Example #1
0
def sz(fname):
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0) # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception: #IGNORE:W0703
                return
        else:
            return -1
        return width, height
Example #2
0
def get_image_size_old(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0) # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception: #IGNORE:W0703
                return
        else:
            return
        return {'w': width, 'h': height, 'ar': height / width, 'sz': height * width }
Example #3
0
 def is_image_allowed(self):
     field = self.user_avatar.data
     if field.filename.lower()[-4:] not in current_app.config['ALLOWED_IMG_EXTENSIONS']:
         raise ValidationError('Invalid file extension')
     if imghdr.what(field.filename) != 'jpeg' and imghdr.what(field.filename) != 'png':
         raise ValidationError('Invalid image format')
     return True
Example #4
0
File: main.py Project: catmes/ImOrg
def mk_clean_images(user_images):
    """ A function that sorts non images and non jpegs.

    Predicting that the given directory won't always be consisting of photos and only jpegs,
    mk_clean_images filters out everything that is not a jpeg. After the filtering,
    a list of clean_images is left - images that won't make the program crash.

    Args:
        user_images - chosen user image dir

    Returns:
        clean_images - list of only jpegs
    """
    clean_images = []
    # Changes cwd to images dir to loop.
    os.chdir(user_images)
    # For all images in user_images directory.
    for lone_image in os.listdir(os.getcwd()):
        try:
            # If image has a jpeg extension, add it to clean_images.
            if imghdr.what(lone_image) == 'jpeg':
                clean_images.append(lone_image)

            # If image does not have jpeg extension, do not add to clean_images.
            elif imghdr.what(lone_image) is not "jpeg":
                # Will pass True if the file is actually an image.
                if imghdr.what(lone_image) in image_values:
                    print "%s: not a jpeg. \n" % lone_image

        # To handle those pesky non images.
        except IOError:
            print "%s: not an image. \n" % lone_image
            continue

    return clean_images
Example #5
0
 def _dimensions(self):
   import struct
   import imghdr
   with open(self._filepath, "rb") as fhandle:
     head = fhandle.read(24)
     if len(head) != 24:
       return
     if imghdr.what(self._filepath) == "png":
       check = struct.unpack(">i", head[4:8])[0]
       if check != 0x0d0a1a0a:
         return
       width, height = struct.unpack(">ii", head[16:24])
     elif imghdr.what(self._filepath) == "gif":
       width, height = struct.unpack("<HH", head[6:10])
     elif imghdr.what(self._filepath) == "jpeg":
       try:
         fhandle.seek(0) # Read 0xff next
         size = 2
         ftype = 0
         while not 0xc0 <= ftype <= 0xcf:
           fhandle.seek(size, 1)
           byte = fhandle.read(1)
           while ord(byte) == 0xff:
             byte = fhandle.read(1)
           ftype = ord(byte)
           size = struct.unpack(">H", fhandle.read(2))[0] - 2
         # We are at a SOFn block
         fhandle.seek(1, 1)  # Skip `precision" byte.
         height, width = struct.unpack(">HH", fhandle.read(4))
       except Exception: #IGNORE:W0703
         return
     else:
       return
     return width, height
Example #6
0
    def validate_image_file(self, field):
        if len(field.data.filename) != 0:
            if field.data.filename[-4:].lower() != '.jpg' and field.data.filename[-4:].lower() != '.png':
                raise ValidationError('Invalid file extension: please select a jpg or png file')

            if imghdr.what(field.data) != 'jpeg' and imghdr.what(field.data) != 'png':
                raise ValidationError('Invalid image format: please select a jpg or png file')
Example #7
0
def get_image_size(filename):
    '''Determine the image type of fhandle and return its size.'''

    fhandle = open(filename, 'rb')
    head = fhandle.read(24)
    if len(head) != 24:
        raise ValueError('{} is less than 24 bytes, cannot be a valid image.'.format(filename))
    if imghdr.what(filename) == 'png':
        check = struct.unpack(str('>i'), head[4:8])[0]
        if check != 0x0d0a1a0a:
            raise ValueError('{} is not a valid png file.'.format(filename))
        width, height = struct.unpack(str('>ii'), head[16:24])
    elif imghdr.what(filename) == 'gif':
        width, height = struct.unpack(str('<HH'), head[6:10])
    elif imghdr.what(filename) == 'jpeg':
        try:
            fhandle.seek(0)  # Read 0xff next
            size = 2
            ftype = 0
            while not 0xc0 <= ftype <= 0xcf:
                fhandle.seek(size, 1)
                byte = fhandle.read(1)
                while ord(byte) == 0xff:
                    byte = fhandle.read(1)
                ftype = ord(byte)
                size = struct.unpack(str('>H'), fhandle.read(2))[0] - 2
            # We are at a SOFn block
            fhandle.seek(1, 1)  # Skip `precision' byte.
            height, width = struct.unpack(str('>HH'), fhandle.read(4))
        except:
            raise ValueError('{} is not a valid jpeg file.'.format(filename))
    else:
        raise ValueError('{} must be a jpeg, gif or png image.'.format(filename))
    return width, height
def get_image_size(image_fhandle):
    img_bytes = image_fhandle.getvalue()
    img_header = image_fhandle.read(24)
    if len(img_header) != 24:
        return None, None
    if imghdr.what(None, img_bytes) == 'png':
        check = struct.unpack('>i', img_header[4:8])[0]
        if check != 0x0d0a1a0a:
            return None, None
        width, height = struct.unpack('>ii', img_header[16:24])
    elif imghdr.what(None, img_bytes) == 'gif':
        width, height = struct.unpack('<HH', img_header[6:10])
    elif imghdr.what(None, img_bytes) == 'jpeg':
        try:
            image_fhandle.seek(0)
            size = 2
            ftype = 0
            while not 0xc0 <= ftype <= 0xcf:
                image_fhandle.seek(size, 1)
                byte = image_fhandle.read(1)
                while ord(byte) == 0xff:
                    byte = image_fhandle.read(1)
                ftype = ord(byte)
                size = struct.unpack('>H', image_fhandle.read(2))[0] - 2
            image_fhandle.seek(1, 1)
            width, height = struct.unpack('>HH', image_fhandle.read(4))
        except Exception:
            return None, None
    else:
        return None, None
    return width, height
Example #9
0
def get_image_size(fname):
    """Determine the image type of fhandle and return its size.
	from draco"""
    fhandle = open(fname, "rb")
    head = fhandle.read(24)
    if len(head) != 24:
        return
    if imghdr.what(fname) == "png":
        check = struct.unpack(">i", head[4:8])[0]
        if check != 0x0D0A1A0A:
            return
        width, height = struct.unpack(">ii", head[16:24])
    elif imghdr.what(fname) == "gif":
        width, height = struct.unpack("<HH", head[6:10])
    elif imghdr.what(fname) == "jpeg":
        try:
            fhandle.seek(0)  # Read 0xff next
            size = 2
            ftype = 0
            while not 0xC0 <= ftype <= 0xCF:
                fhandle.seek(size, 1)
                byte = fhandle.read(1)
                while ord(byte) == 0xFF:
                    byte = fhandle.read(1)
                ftype = ord(byte)
                size = struct.unpack(">H", fhandle.read(2))[0] - 2
                # We are at a SOFn block
            fhandle.seek(1, 1)  # Skip `precision' byte.
            height, width = struct.unpack(">HH", fhandle.read(4))
        except Exception:  # IGNORE:W0703
            return
    else:
        return
    return width, height
def getImageSize(fname):
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            raise RuntimeError("Invalid Header")
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                raise RuntimeError("PNG: Invalid check")
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            fhandle.seek(0) # Read 0xff next
            size = 2
            ftype = 0
            while not 0xc0 <= ftype <= 0xcf:
                fhandle.seek(size, 1)
                byte = fhandle.read(1)
                while ord(byte) == 0xff:
                    byte = fhandle.read(1)
                ftype = ord(byte)
                size = struct.unpack('>H', fhandle.read(2))[0] - 2
            # We are at a SOFn block
            fhandle.seek(1, 1)  # Skip `precision' byte.
            height, width = struct.unpack('>HH', fhandle.read(4))
        else:
            raise RuntimeError("Unsupported format")
        return width, height
Example #11
0
def test_thumbnails():
    """
    Test thumbnails generation.
    1. Create a video (+audio) from gst's videotestsrc
    2. Capture thumbnail
    3. Everything should get removed because of temp files usage
    """
    # data  create_data() as (video_name, thumbnail_name):
    test_formats = [(".png", "png"), (".jpg", "jpeg"), (".gif", "gif")]
    for suffix, format in test_formats:
        with create_data(suffix) as (video_name, thumbnail_name):
            capture_thumb(video_name, thumbnail_name, width=40)
            # check result file format
            assert imghdr.what(thumbnail_name) == format
            # TODO: check height and width
            # FIXME: it doesn't work with small width, say, 10px. This should be
            # fixed somehow
    suffix, format = test_formats[0]
    with create_data(suffix, True) as (video_name, thumbnail_name):
        capture_thumb(video_name, thumbnail_name, width=40)
        assert imghdr.what(thumbnail_name) == format
    with create_data(suffix, True) as (video_name, thumbnail_name):
        capture_thumb(video_name, thumbnail_name, width=10)  # smaller width
        assert imghdr.what(thumbnail_name) == format
    with create_data(suffix, True) as (video_name, thumbnail_name):
        capture_thumb(video_name, thumbnail_name, width=100)  # bigger width
        assert imghdr.what(thumbnail_name) == format
def get_image_size(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    fhandle = open(fname, 'rb')
    head = fhandle.read(24)
    if len(head) != 24:
        raise TypeError
    if imghdr.what(fname) == 'png':
        check = struct.unpack('>i', head[4:8])[0]
        if check != 0x0d0a1a0a:
            raise TypeError
        width, height = struct.unpack('>ii', head[16:24])
    elif imghdr.what(fname) == 'gif':
        width, height = struct.unpack('<HH', head[6:10])
    elif imghdr.what(fname) == 'jpeg':
        fhandle.seek(0)  # Read 0xff next
        size = 2
        ftype = 0
        while not 0xc0 <= ftype <= 0xcf:
            fhandle.seek(size, 1)
            byte = fhandle.read(1)
            while ord(byte) == 0xff:
                byte = fhandle.read(1)
            ftype = ord(byte)
            size = struct.unpack('>H', fhandle.read(2))[0] - 2
        # We are at a SOFn block
        fhandle.seek(1, 1)  # Skip `precision' byte.
        height, width = struct.unpack('>HH', fhandle.read(4))
    else:
        raise TypeError
    return width, height
Example #13
0
def get_image_size(fname):
    # https://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            raise Exception("Invalid header")
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                raise Exception("png checksum failed")
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            fhandle.seek(0) # Read 0xff next
            size = 2
            ftype = 0
            while not 0xc0 <= ftype <= 0xcf:
                fhandle.seek(size, 1)
                byte = fhandle.read(1)
                while ord(byte) == 0xff:
                    byte = fhandle.read(1)
                ftype = ord(byte)
                size = struct.unpack('>H', fhandle.read(2))[0] - 2
            # We are at a SOFn block
            fhandle.seek(1, 1)  # Skip `precision' byte.
            height, width = struct.unpack('>HH', fhandle.read(4))
        else:
            raise Exception("Invalid handle")
        return width, height, imghdr.what(fname)
Example #14
0
def zip(base_dir):

    from contextlib import closing
    from zipfile import ZipFile, ZIP_DEFLATED
    import imghdr

    # caminhar pelas pastas
    for root, dirs, files in os.walk(base_dir):

        # nome da pasta onde estão as imagens
        folder_name = os.path.split(root)[1] + r".cbz"

        # teste para evitar zips vazios
        has_imgs = False
        for name in files:
            page = os.path.join(root, name)
            if (imghdr.what(page)) == "jpeg":
                has_imgs = True
        if has_imgs:
            print "criando : " + folder_name
            # criando o zip so com imagens
            with closing(ZipFile(folder_name, "w", ZIP_DEFLATED)) as zipcomic:
                for name in files:
                    page = os.path.join(root, name)
                    if (imghdr.what(page)) == "jpeg":

                        # cortando o path do arquivo
                        arc_name = page[len(base_dir) + len(os.sep):]
                        zipcomic.write(page, arc_name)
                    else:
                        print "ignorado : " + name + " -  incompatível com img"
    move(base_dir)
Example #15
0
def download():
    for file in files:
        print(file)
        if os.path.isdir("all_10_classes/" + file[:-4]) is False:
            os.makedirs("all_10_classes/" + file[:-4])
        else:
            print(file, " already exists, skipping")
            continue
        count = 0
        line = 0
        f = open(path + file)
        for url in f:
            line += 1
            try:
                req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
                file_name = "all_10_classes/" + file[:-4] + "/" + file[:-4] + "_" + str(count)
                response = urllib.request.urlopen(req, timeout=2)
                out_file = open(file_name, 'wb')
                shutil.copyfileobj(response, out_file)
                count += 1
            except:
                print("bad format, or impossible to download line -> ", line)
                continue
            if os.path.isfile(file_name):
                if imghdr.what(file_name) != "jpeg":
                    print(imghdr.what(file_name))
                    if (imghdr.what(file_name)) is None:
                        os.remove(file_name)


            print(line, " saving image # ", count)
Example #16
0
def from_html_links_to_inline_imgs(content, inline=True, full_path=True):
  """
  Replaces (html) links to attachs with embeded inline images
  """
  content = re.sub("%s" %settings.SITE_URL, '', content) # changes absolute urls to relative urls
  attach_re = r'"(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/'
  attach_str_textversion = r'/text/%s/attach/%s/'
  attach_str = r'/attach/%s/'
  for match in re.findall(attach_re, content):
    if match[0]:
      link = attach_str_textversion %match
    else:
      link = attach_str %match[1]
    
    attach = Attachment.objects.get(key=match[1])                
    if inline:
      img_fmt = imghdr.what(attach.data.path)
      img = open(attach.data.path, 'rb')
      data = base64.b64encode(img.read())
      img.close()
      content = content.replace(link, 'data:image/'+img_fmt+';base64,'+data)
    else:
      if full_path:
        content = content.replace(link, attach.data.path)
      else:
        img_fmt = imghdr.what(attach.data.path)
        content = content.replace(link, match[1]+'.'+img_fmt)
  return content
    def test_write_bench_results_correct_num(self):
        """Correctly writes the bench results with numerical labels"""
        write_bench_results('bench_data', self.num_data, self.output_dir)

        # Correctly generates the summarized_results.txt file
        fp = join(self.output_dir, 'summarized_results.txt')
        with open(fp, 'U') as f:
            obs = f.read()
        exp = ("#label\twall_mean\twall_std\tuser_mean\tuser_std\t"
               "kernel_mean\tkernel_std\tmem_mean\tmem_std\n"
               "100\t25\t1\t23\t0.9\t2\t0.1\t1048576\t0.0\n"
               "200\t50\t2\t46\t2\t4\t0.0\t2097152\t0.0\n"
               "300\t75\t3\t70\t2.9\t5\t0.001\t3145728\t0.0\n"
               "400\t100\t4\t94\t4.1\t6\t0.2\t4194304\t0.2\n"
               "500\t125\t5\t123\t5\t2\t0.02\t5242880\t0.0\n")
        self.assertEqual(obs, exp)

        # Correctly generates the curves.txt file
        fp = join(self.output_dir, 'curves.txt')
        with open(fp, 'U') as f:
            obs = f.read()
        exp = ("Wall time fitted curve\n0.25*x^1 + 0\n"
               "Memory usage fitted curve\n10486*x^1 + 0\n")
        self.assertEqual(obs, exp)

        # Correctly generates the plot figures
        fp = join(self.output_dir, 'time_fig.png')
        self.assertEqual(what(fp), 'png')
        fp = join(self.output_dir, 'mem_fig.png')
        self.assertEqual(what(fp), 'png')
    def validate_image_file(self, field):
        if len(field.data.filename) != 0:
            if field.data.filename[-4:].lower() != ".jpg" and field.data.filename[-4:].lower() != ".png":
                raise ValidationError("Invalid file extension: please select a jpg or png file")

            if imghdr.what(field.data) != "jpeg" and imghdr.what(field.data) != "png":
                raise ValidationError("Invalid image format: please select a jpg or png file")
Example #19
0
 def _get_image_type(image):
     if imghdr.what(image) == 'jpeg':
         return 'image/jpeg'
     elif imghdr.what(image) == 'gif':
         return 'image/gif'
     elif imghdr.what(image) == 'png':
         return 'image/png'
Example #20
0
def get_image_info(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(32)
        if len(head) != 32:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            info = [("Type", "png"),
                    ("Dimensions", "%sx%s" % (struct.unpack('>ii', head[16:24])))]
        elif imghdr.what(fname) == 'gif':
            info = [("Type", "gif"),
                    ("Dimensions", "%sx%s" % (struct.unpack('<HH', head[6:10])))]
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0)  # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
                info = [("Type", "jpeg"),
                        ("Dimensions", "%sx%s" % (width, height)),
                        ("Progressive", is_progressive(fname))]
            except Exception:  # IGNORE:W0703
                return
        elif imghdr.what(fname) == 'pgm':
            header, width, height, maxval = re.search(
                b"(^P5\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", head).groups()
            info = [("Type", "pgm"),
                    ("Dimensions", "%sx%s" % (width, height)),
                    ("maxval", maxval),
                    ("Header", header)]
        elif imghdr.what(fname) == 'bmp':
            _, width, height, depth = re.search(
                b"((\d+)\sx\s"
                b"(\d+)\sx\s"
                b"(\d+))", str).groups()
            info = [("Type", "bmp"),
                    ("Dimensions", "%sx%s" % (width, height)),
                    ("Depth", depth)]
        else:
            return
        info.append(("File size", "%.2f kb" % (os.path.getsize(fname) / 1024)))
        return info
Example #21
0
 def test_closed_file(self):
     stream = open(self.testfile, 'rb')
     stream.close()
     with self.assertRaises(ValueError) as cm:
         imghdr.what(stream)
     stream = io.BytesIO(self.testdata)
     stream.close()
     with self.assertRaises(ValueError) as cm:
         imghdr.what(stream)
    def test_write_comp_results_correct_str(self):
        """Correctly writes the comp results with string labels"""
        write_comp_results('comp_data', self.str_comp_data, self.output_dir)

        # Correctly generates the plot figures
        fp = join(self.output_dir, 'time_fig.png')
        self.assertEqual(what(fp), 'png')
        fp = join(self.output_dir, 'mem_fig.png')
        self.assertEqual(what(fp), 'png')
Example #23
0
 def test_data(self):
     for filename, expected in TEST_FILES:
         filename = findfile(filename, subdir='imghdrdata')
         self.assertEqual(imghdr.what(filename), expected)
         with open(filename, 'rb') as stream:
             self.assertEqual(imghdr.what(stream), expected)
         with open(filename, 'rb') as stream:
             data = stream.read()
         self.assertEqual(imghdr.what(None, data), expected)
         self.assertEqual(imghdr.what(None, bytearray(data)), expected)
Example #24
0
 def test_string_data(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", BytesWarning)
         for filename, _ in TEST_FILES:
             filename = findfile(filename, subdir='imghdrdata')
             with open(filename, 'rb') as stream:
                 data = stream.read().decode('latin1')
             with self.assertRaises(TypeError):
                 imghdr.what(io.StringIO(data))
             with self.assertRaises(TypeError):
                 imghdr.what(None, data)
Example #25
0
 def test_data(self):
     for filename, expected in TEST_FILES:
         filename = findfile(filename, subdir='imghdrdata')
         self.assertEqual(imghdr.what(filename), expected)
         ufilename = filename.decode(sys.getfilesystemencoding())
         self.assertEqual(imghdr.what(ufilename), expected)
         with open(filename, 'rb') as stream:
             self.assertEqual(imghdr.what(stream), expected)
         with open(filename, 'rb') as stream:
             data = stream.read()
         self.assertEqual(imghdr.what(None, data), expected)
Example #26
0
def imgToTrack(tag, image):
    if imghdr.what(image) != "gif":    
        tag.addImage(3, image, u"cover")
        tag.getImages()[0].mimeType='image/'+(imghdr.what(image))
    else:
        im = Image.open(image)
        if im.mode != "RGB":
            im = im.convert("RGB")
        im.save("imgs/temp.jpg", "JPEG")
        tag.addImage(3, "imgs/temp.jpg", u"cover")
        tag.getImages()[0].mimeType='image/jpeg'
        os.remove("imgs/temp.jpg" % imgs)
Example #27
0
 def test_bad_args(self):
     with self.assertRaises(TypeError):
         imghdr.what()
     with self.assertRaises(AttributeError):
         imghdr.what(None)
     with self.assertRaises(TypeError):
         imghdr.what(self.testfile, 1)
     with self.assertRaises(AttributeError):
         imghdr.what(os.fsencode(self.testfile))
     with open(self.testfile, 'rb') as f:
         with self.assertRaises(AttributeError):
             imghdr.what(f.fileno())
Example #28
0
def validate_upload_file(upload, mime_type):
    # Make sure files are not corrupted.

    if mime_type == 'image/jpeg':
        return imghdr.what(upload) == 'jpeg'
    elif mime_type == 'image/png':
        return imghdr.what(upload) == 'png'
    elif mime_type == 'image/gif':
        return imghdr.what(upload) == 'gif'
    elif mime_type == 'application/pdf':
        doc = PdfFileReader(upload)
        if doc.numPages >= 0:
            return True
    return False
Example #29
0
	def addImage(self, img):
		fp = open('com.pyepub.temp', 'w')
		fp.write(img)
		fp.close()
		if imghdr.what('com.pyepub.temp'):
			img_type = imghdr.what('com.pyepub.temp')
		else:
			img_type = 'jpeg'
		os.remove('com.pyepub.temp')
		fileName = 'i'+str(self.imageIndex)+'.'+img_type
		self.epub.writestr('Images/' + fileName, img)
		self.manifest.append('<item id="%(fileName)s" href="Images/%(fileName)s" media-type="image/%(type)s"/>' % {'fileName': fileName, 'type': img_type})
		self.imageIndex += 1
		return '../Images/'+fileName
Example #30
0
def get_image_size(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(32)
        if len(head) != 32:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0)  # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception:  # IGNORE:W0703
                return
        elif imghdr.what(fname) == 'pgm':
            header, width, height, maxval = re.search(
                b"(^P5\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n])*"
                b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", head).groups()
            width = int(width)
            height = int(height)
        elif imghdr.what(fname) == 'bmp':
            _, width, height, depth = re.search(
                b"((\d+)\sx\s"
                b"(\d+)\sx\s"
                b"(\d+))", str).groups()
            width = int(width)
            height = int(height)
        else:
            return
        return width, height
Example #31
0
def find_plot_pipelines(need_init=True, filter_threshold=FILTER_THRESHOLD):
    r'''
    Exercise find_event_pipeline() and plot_event_pipeline()
    '''

    main_time_start = time()

    # If configured to do so, initialize temp directory
    # and fetch all of the HDF5 files from the Internet.
    if need_init:
        initialize()
        for filename_h5 in H5_FILE_LIST:
            wgetter(filename_h5)
        # Make all of the DAT files.
    
    # For each h5, make a dat file.
    make_all_dat_files()

    print('find_plot_pipelines: Filter threshold = ', filter_threshold)
    number_in_cadence = len(H5_FILE_LIST)
    print('find_plot_pipelines: Cadence length = ', number_in_cadence)
    print('find_plot_pipelines: find_event_pipeline({}) ...'
          .format(PATH_DAT_LIST_FILE))

    # With the list of DAT files, do find_event_pipeline()
    df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
                                   filter_threshold=filter_threshold,
                                   number_in_cadence=number_in_cadence,
                                   user_validation=False,
                                   saving=True,
                                   csv_name=PATH_CSVF)

    # CSV file created?
    if not Path(PATH_CSVF).exists():
        raise ValueError('find_plot_pipelines: No CSV of events created')

    # An event CSV was created.
    # Validate the hit table file.
    utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines', N_EVENTS)

    # Make a list of the HDF5 files.
    print('find_plot_pipelines: making a list of HDF5 files in ({}) ...'
          .format(PATH_DAT_LIST_FILE))
    with open(PATH_H5_LIST_FILE, 'w') as file_handle:
        for filename_h5 in H5_FILE_LIST:
            file_handle.write('{}\n'.format(TESTDIR + filename_h5))

    # Do the plots for all of the HDF5/DAT file pairs.
    print('find_plot_pipelines: plot_event_pipeline({}, {}) ...'
          .format(PATH_CSVF, PATH_H5_LIST_FILE))
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec='f{}'.format(filter_threshold),
                        user_validation=False)

    # Check that the right number of PNG files were created.
    outdir_list = listdir(TESTDIR)
    npngs = 0
    for cur_file in outdir_list:
        if cur_file.split('.')[-1] == 'png':
            if imghdr.what(TESTDIR + cur_file) != 'png':
                raise ValueError('find_plot_pipelines: File {} is not a PNG file'
                                 .format(cur_file))
            npngs += 1
    if npngs != N_EVENTS:
        raise ValueError('find_plot_pipelines: Expected to find {} PNG files but observed {}'
                         .format(N_EVENTS, npngs))

    # Stop the clock - we're done.
    main_time_stop = time()

    print('find_plot_pipelines: End, et = {:.1f} seconds'
          .format(main_time_stop - main_time_start))
Example #32
0
# -*- coding: utf-8 -*-
"""
imghdr_test.py
Created on Sat Apr 20 11:19:17 2019

@author: madhu

"""


import imghdr

file_name = 'oreilly.png'

print('File', file_name,'is a:', imghdr.what(file_name))
Example #33
0
 def _get_internal_emoji(self, fname):
     f = os.path.join(INTERNAL_EMOJI_DIR, fname)
     return get_file_b64(f), imghdr.what(f)
Example #34
0
 def read(self, csv_row):
     path = str(csv_row[self._column.columns_indexes[0]])
     img_data = np.void(open(path, 'r').read(
     )) if self._is_raw_blob else nibabel.load(path).get_data()
     img_fmt = imghdr.what(path)
     return img_data, img_fmt
Example #35
0
    def _avatar_set(self, msg, avatar_name):
        """Set avatar for Ludolph (admin only)"""
        if os.path.splitext(
                avatar_name)[-1] not in self._avatar_allowed_extensions:
            raise CommandError(
                'You have requested a file that is not supported')

        avatar = None
        available_avatar_directories = self._get_avatar_dirs()

        for avatar_dir in available_avatar_directories:
            # Create full path to file requested by user
            avatar_file = os.path.join(avatar_dir, avatar_name)
            # Split absolute path for check if user is not trying to jump outside allowed dirs
            path, name = os.path.split(os.path.abspath(avatar_file))

            if path not in available_avatar_directories:
                raise CommandError(
                    'You are not allowed to set avatar outside defined directories'
                )

            try:
                with open(avatar_file, 'rb') as f:
                    avatar = f.read()
            except (OSError, IOError):
                avatar = None
            else:
                break

        if not avatar:
            raise CommandError(
                'Avatar "%s" has not been found.\n'
                'You can list available avatars with the command: **avatar-list**'
                % avatar_name)

        self.xmpp.msg_reply(
            msg,
            'I have found the selected avatar, changing it might take few seconds...',
            preserve_msg=True)
        xep_0084 = self.xmpp.client.plugin['xep_0084']
        avatar_type = 'image/%s' % imghdr.what('', avatar)
        avatar_id = xep_0084.generate_id(avatar)
        avatar_bytes = len(avatar)

        try:
            logger.debug('Publishing XEP-0084 avatar data')
            xep_0084.publish_avatar(avatar)
        except XMPPError as e:
            logger.error('Could not publish XEP-0084 avatar: %s' % e.text)
            raise CommandError('Could not publish selected avatar')

        try:
            logger.debug('Publishing XEP-0153 avatar vCard data')
            self.xmpp.client.plugin['xep_0153'].set_avatar(avatar=avatar,
                                                           mtype=avatar_type)
        except XMPPError as e:
            logger.error('Could not publish XEP-0153 vCard avatar: %s' %
                         e.text)
            raise CommandError('Could not set vCard avatar')

        self.xmpp.msg_reply(msg,
                            'Almost done, please be patient',
                            preserve_msg=True)

        try:
            logger.debug('Advertise XEP-0084 avatar metadata')
            xep_0084.publish_avatar_metadata([{
                'id': avatar_id,
                'type': avatar_type,
                'bytes': avatar_bytes
            }])
        except XMPPError as e:
            logger.error('Could not publish XEP-0084 metadata: %s' % e.text)
            raise CommandError('Could not publish avatar metadata')

        return 'Avatar has been changed :)'
Example #36
0
 def validate_image(path):
     ext = imghdr.what(path)
     if ext == 'jpeg':
         ext = 'jpg'
     return ext  # returns None if not valid
Example #37
0
    def readability(self, article, url, opts=None, user=None):
        """ 使用readability-lxml处理全文信息
        因为图片文件占内存,为了节省内存,这个函数也做为生成器
        """
        content = self.preprocess(article)
        if not content:
            return

        # 提取正文
        try:
            doc = readability.Document(content,
                                       positive_keywords=self.positive_classes)
            summary = doc.summary(html_partial=False)
        except:
            # 如果提取正文出错,可能是图片(一个图片做为一篇文章,没有使用html包装)
            imgtype = imghdr.what(None, content)
            if imgtype:  #如果是图片,则使用一个简单的html做为容器
                imgmime = r"image/" + imgtype
                fnimg = "img%d.%s" % (self.imgindex,
                                      'jpg' if imgtype == 'jpeg' else imgtype)
                yield (imgmime, url, fnimg, content, None, None)
                tmphtml = '<html><head><title>Picture</title></head><body><img src="%s" /></body></html>' % fnimg
                yield ('Picture', None, None, tmphtml, '', None)
            else:
                self.log.warn('article is invalid.[%s]' % url)
            return

        title = doc.short_title()
        if not title:
            self.log.warn('article has no title.[%s]' % url)
            return

        title = self.processtitle(title)

        soup = BeautifulSoup(summary, "lxml")

        #如果readability解析失败,则启用备用算法(不够好,但有全天候适应能力)
        body = soup.find('body')
        head = soup.find('head')
        if len(body.contents) == 0:
            from simpleextract import simple_extract
            summary = simple_extract(content)
            soup = BeautifulSoup(summary, "lxml")
            body = soup.find('body')
            if not body:
                self.log.warn('extract article content failed.[%s]' % url)
                return

            head = soup.find('head')
            #增加备用算法提示,提取效果不好不要找我,类似免责声明:)
            info = soup.new_tag(
                'p', style='color:#555555;font-size:60%;text-align:right;')
            info.string = 'extracted by alternative algorithm.'
            body.append(info)

            self.log.info('use alternative algorithm to extract content.')

        if not head:
            head = soup.new_tag('head')
            soup.html.insert(0, head)

        if not head.find('title'):
            t = soup.new_tag('title')
            t.string = title
            head.append(t)

        #如果没有内容标题则添加
        t = body.find(['h1', 'h2'])
        if not t:
            t = soup.new_tag('h2')
            t.string = title
            body.insert(0, t)
        else:
            totallen = 0
            for ps in t.previous_siblings:
                totallen += len(string_of_tag(ps))
                if totallen > 40:  #此H1/H2在文章中间出现,不是文章标题
                    t = soup.new_tag('h2')
                    t.string = title
                    body.insert(0, t)
                    break

        if self.remove_tags:
            for tag in soup.find_all(self.remove_tags):
                tag.decompose()
        for id in self.remove_ids:
            for tag in soup.find_all(attrs={"id": id}):
                tag.decompose()
        for cls in self.remove_classes:
            for tag in soup.find_all(attrs={"class": cls}):
                tag.decompose()
        for attr in self.remove_attrs:
            for tag in soup.find_all(attrs={attr: True}):
                del tag[attr]
        for cmt in soup.find_all(text=lambda text: isinstance(text, Comment)):
            cmt.extract()

        #删除body的所有属性,以便InsertToc使用正则表达式匹配<body>
        bodyattrs = [attr for attr in body.attrs]
        for attr in bodyattrs:
            del body[attr]

        if self.extra_css:
            sty = soup.new_tag('style', type="text/css")
            sty.string = self.extra_css
            soup.html.head.append(sty)

        self.soupbeforeimage(soup)

        has_imgs = False
        thumbnail = None

        if self.keep_image:
            opener = URLOpener(self.host, timeout=self.timeout)
            for img in soup.find_all('img'):
                #现在使用延迟加载图片技术的网站越来越多了,这里处理一下
                #注意:如果data-src之类的属性保存的不是真实url就没辙了
                imgurl = img['src'] if 'src' in img.attrs else ''
                if not imgurl:
                    for attr in img.attrs:
                        if attr != 'src' and 'src' in attr:  #很多网站使用data-src
                            imgurl = img[attr]
                            break
                if not imgurl:
                    img.decompose()
                    continue
                if not imgurl.startswith('http'):
                    imgurl = self.urljoin(url, imgurl)
                if self.fetch_img_via_ssl and url.startswith('https://'):
                    imgurl = imgurl.replace('http://', 'https://')
                if self.isfiltered(imgurl):
                    self.log.warn('img filtered : %s' % imgurl)
                    img.decompose()
                    continue
                imgresult = opener.open(imgurl)
                imgcontent = self.process_image(
                    imgresult.content,
                    opts) if imgresult.status_code == 200 else None
                if imgcontent:
                    if len(imgcontent
                           ) < self.img_min_size:  #rexdf too small image
                        img.decompose()
                        continue

                    imgtype = imghdr.what(None, imgcontent)
                    if imgtype:
                        imgmime = r"image/" + imgtype
                        fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype
                                              == 'jpeg' else imgtype)
                        img['src'] = fnimg

                        #使用第一个图片做为目录缩略图
                        if not has_imgs:
                            has_imgs = True
                            thumbnail = imgurl
                            yield (imgmime, imgurl, fnimg, imgcontent, None,
                                   True)
                        else:
                            yield (imgmime, imgurl, fnimg, imgcontent, None,
                                   None)
                    else:
                        img.decompose()
                else:
                    self.log.warn('fetch img failed(err:%d):%s' %
                                  (imgresult.status_code, imgurl))
                    img.decompose()

            #去掉图像上面的链接,以免误触后打开浏览器
            for img in soup.find_all('img'):
                if img.parent and img.parent.parent and \
                    img.parent.name == 'a':
                    img.parent.replace_with(img)
        else:
            for img in soup.find_all('img'):
                img.decompose()

        #将HTML5标签转换为div
        for x in soup.find_all([
                'article', 'aside', 'header', 'footer', 'nav', 'figcaption',
                'figure', 'section', 'time'
        ]):
            x.name = 'div'

        self.soupprocessex(soup)

        #插入分享链接
        if user:
            self.AppendShareLinksToArticle(soup, user, url)

        content = unicode(soup)

        #提取文章内容的前面一部分做为摘要
        brief = u''
        if GENERATE_TOC_DESC:
            for h in body.find_all(['h1', 'h2']):  # 去掉h1/h2,避免和标题重复
                h.decompose()
            for s in body.stripped_strings:
                brief += unicode(s) + u' '
                if len(brief) >= TOC_DESC_WORD_LIMIT:
                    brief = brief[:TOC_DESC_WORD_LIMIT]
                    break
        soup = None

        yield (title, None, None, content, brief, thumbnail)
Example #38
0
    def Items(self, opts=None, user=None):
        """
        生成器,返回一个元组
        对于HTML:section,url,title,content,brief,thumbnail
        对于图片,mime,url,filename,content,brief,thumbnail
        如果是图片,仅第一个图片的thumbnail返回True,其余为None
        """
        decoder = AutoDecoder(False)
        timeout = self.timeout
        for section, url in self.feeds:
            opener = URLOpener(self.host, timeout=timeout)
            result = opener.open(url)
            status_code, content = result.status_code, result.content
            if status_code != 200 or not content:
                self.log.warn('fetch article failed(%d):%s.' %
                              (status_code, url))
                continue

            if self.page_encoding:
                try:
                    content = content.decode(self.page_encoding)
                except UnicodeDecodeError:
                    content = decoder.decode(content, opener.realurl,
                                             result.headers)
            else:
                content = decoder.decode(content, opener.realurl,
                                         result.headers)

            content = self.preprocess(content)
            soup = BeautifulSoup(content, "lxml")

            head = soup.find('head')
            if not head:
                head = soup.new_tag('head')
                soup.html.insert(0, head)
            if not head.find('title'):
                t = soup.new_tag('title')
                t.string = section
                head.append(t)

            try:
                title = soup.html.head.title.string
            except AttributeError:
                title = section
                #self.log.warn('object soup invalid!(%s)'%url)
                #continue

            title = self.processtitle(title)

            if self.keep_only_tags:
                body = soup.new_tag('body')
                try:
                    if isinstance(self.keep_only_tags, dict):
                        keep_only_tags = [self.keep_only_tags]
                    else:
                        keep_only_tags = self.keep_only_tags
                    for spec in keep_only_tags:
                        for tag in soup.find('body').find_all(**spec):
                            body.insert(len(body.contents), tag)
                    soup.find('body').replace_with(body)
                except AttributeError:  # soup has no body element
                    pass

            for spec in self.remove_tags_after:
                tag = soup.find(**spec)
                remove_beyond(tag, 'next_sibling')

            for spec in self.remove_tags_before:
                tag = soup.find(**spec)
                remove_beyond(tag, 'previous_sibling')

            remove_tags = self.insta_remove_tags + self.remove_tags
            remove_ids = self.insta_remove_ids + self.remove_ids
            remove_classes = self.insta_remove_classes + self.remove_classes
            remove_attrs = self.insta_remove_attrs + self.remove_attrs
            for tag in soup.find_all(remove_tags):
                tag.decompose()
            for id in remove_ids:
                for tag in soup.find_all(attrs={"id": id}):
                    tag.decompose()
            for cls in remove_classes:
                for tag in soup.find_all(attrs={"class": cls}):
                    tag.decompose()
            for attr in remove_attrs:
                for tag in soup.find_all(attrs={attr: True}):
                    del tag[attr]
            for cmt in soup.find_all(
                    text=lambda text: isinstance(text, Comment)):
                cmt.extract()

            #删除body的所有属性,以便InsertToc使用正则表达式匹配<body>
            body = soup.html.body
            bodyattrs = [attr for attr in body.attrs]
            for attr in bodyattrs:
                del body[attr]

            if self.extra_css:
                sty = soup.new_tag('style', type="text/css")
                sty.string = self.extra_css
                soup.html.head.append(sty)

            has_imgs = False
            thumbnail = None
            if self.keep_image:
                self.soupbeforeimage(soup)
                for img in soup.find_all('img'):
                    #现在使用延迟加载图片技术的网站越来越多了,这里处理一下
                    #注意:如果data-src之类的属性保存的不是真实url就没辙了
                    imgurl = img['src'] if 'src' in img.attrs else ''
                    if not imgurl:
                        for attr in img.attrs:
                            if attr != 'src' and 'src' in attr:  #很多网站使用data-src
                                imgurl = img[attr]
                                break
                    if not imgurl:
                        img.decompose()
                        continue
                    if not imgurl.startswith('http'):
                        imgurl = self.urljoin(url, imgurl)
                    if self.fetch_img_via_ssl and url.startswith('https://'):
                        imgurl = imgurl.replace('http://', 'https://')
                    if self.isfiltered(imgurl):
                        self.log.warn('img filtered:%s' % imgurl)
                        img.decompose()
                        continue
                    imgresult = opener.open(imgurl)
                    imgcontent = self.process_image(
                        imgresult.content,
                        opts) if imgresult.status_code == 200 else None
                    if imgcontent:
                        if len(imgcontent
                               ) < self.img_min_size:  #rexdf too small image
                            img.decompose()
                            continue

                        imgtype = imghdr.what(None, imgcontent)
                        if imgtype:
                            imgmime = r"image/" + imgtype
                            fnimg = "img%d.%s" % (self.imgindex,
                                                  'jpg' if imgtype == 'jpeg'
                                                  else imgtype)
                            img['src'] = fnimg

                            #使用第一个图片做为目录摘要图
                            if not has_imgs:
                                has_imgs = True
                                thumbnail = imgurl
                                yield (imgmime, imgurl, fnimg, imgcontent,
                                       None, True)
                            else:
                                yield (imgmime, imgurl, fnimg, imgcontent,
                                       None, None)
                        else:
                            img.decompose()
                    else:
                        self.log.warn('fetch img failed(err:%d):%s' %
                                      (imgresult.status_code, imgurl))
                        img.decompose()

                #去掉图像上面的链接
                for img in soup.find_all('img'):
                    if img.parent and img.parent.parent and \
                        img.parent.name == 'a':
                        img.parent.replace_with(img)

            else:
                for img in soup.find_all('img'):
                    img.decompose()

            self.soupprocessex(soup)
            content = unicode(soup)

            #提取文章内容的前面一部分做为摘要
            brief = u''
            if GENERATE_TOC_DESC:
                for h in body.find_all(['h1', 'h2']):  # 去掉h1/h2,避免和标题重复
                    h.decompose()
                for s in body.stripped_strings:
                    brief += unicode(s) + u' '
                    if len(brief) >= TOC_DESC_WORD_LIMIT:
                        brief = brief[:TOC_DESC_WORD_LIMIT]
                        break
            soup = None

            content = self.postprocess(content)
            yield (section, url, title, content, brief, thumbnail)
Example #39
0
 def serve(self, rendition):
     # Open and serve the file
     rendition.file.open('rb')
     image_format = imghdr.what(rendition.file)
     return StreamingHttpResponse(FileWrapper(rendition.file),
                                  content_type='image/' + image_format)
Example #40
0
#!/usr/bin/env python3

import imghdr
import os
import tempfile
import shutil
import uuid

# Set the directory you want to start from
rootDir = '/Volumes/Vanessa/2020-IPHONE-BACKUP-FILE/'
newRootDir = '/Volumes/Vanessa/Jane/'
for dirName, subdirList, fileList in os.walk(rootDir):
    print('Found directory: %s' % dirName)
    for fname in fileList:
        file = os.path.join(dirName, fname)
        ext = imghdr.what(file)
        if ext == 'jpeg':
            newFile = os.path.join(newRootDir, str(uuid.uuid4()) + '.' + ext)
            print(newFile)
            shutil.copy(file, newFile)
    def get_file_extension(self, file_name, decoded_file):
        extension = imghdr.what(file_name, decoded_file)
        extension = "jpg" if extension == "jpeg" else extension

        return extension
Example #42
0
def _main(args):
    model_path = os.path.expanduser(args.model_path)
    assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
    anchors_path = os.path.expanduser(args.anchors_path)
    classes_path = os.path.expanduser(args.classes_path)
    test_path = os.path.expanduser(args.test_path)
    output_path = os.path.expanduser(args.output_path)

    if not os.path.exists(output_path):
        print('Creating output path {}'.format(output_path))
        os.mkdir(output_path)

    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.

    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    with open(anchors_path) as f:
        anchors = f.readline()
        print("  ancharos ", anchors)
        if len(anchors) > 0:
            anchors = [float(x) for x in anchors.split(',')]
            anchors = np.array(anchors).reshape(-1, 2)

    yolo_model = load_model(model_path)

    # Verify model, anchors, and classes are compatible
    num_classes = len(class_names)
    num_anchors = len(anchors)
    # TODO: Assumes dim ordering is channel last
    model_output_channels = yolo_model.layers[-1].output_shape[-1]
    print(num_classes, num_anchors, model_output_channels)
    assert model_output_channels == num_anchors * (num_classes + 5), \
        'Mismatch between model and given anchor and class sizes. ' \
        'Specify matching anchors and classes with --anchors_path and ' \
        '--classes_path flags.'
    print('{} model, anchors, and classes loaded.'.format(model_path))

    # Check if model is fully convolutional, assuming channel last order.
    model_image_size = yolo_model.layers[0].input_shape[1:3]
    is_fixed_size = model_image_size != (None, None)

    # Generate colors for drawing bounding boxes.
    hsv_tuples = [(x / len(class_names), 1., 1.)
                  for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
            colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.

    # Generate output tensor targets for filtered bounding boxes.
    # TODO: Wrap these backend operations with Keras layers.
    yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(yolo_outputs,
                                       input_image_shape,
                                       score_threshold=args.score_threshold,
                                       iou_threshold=args.iou_threshold)

    for image_file in os.listdir(test_path):
        try:
            image_type = imghdr.what(os.path.join(test_path, image_file))
            if not image_type:
                continue
        except:
            continue

        image = Image.open(os.path.join(test_path, image_file))
        if is_fixed_size:  # TODO: When resizing we can use minibatch input.
            resized_image = image.resize(tuple(reversed(model_image_size)),
                                         Image.BICUBIC)
            image_data = np.array(resized_image, dtype='float32')
        else:
            # Due to skip connection + max pooling in YOLO_v2, inputs must have
            # width and height as multiples of 32.
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            resized_image = image.resize(new_image_size, Image.BICUBIC)
            image_data = np.array(resized_image, dtype='float32')
            print(image_data.shape)

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        out_boxes, out_scores, out_classes = sess.run(
            [boxes, scores, classes],
            feed_dict={
                yolo_model.input: image_data,
                input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })
        print('Found {} boxes for {}'.format(len(out_boxes), image_file))

        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)

            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

        image.save(os.path.join(output_path, image_file), quality=90)
    sess.close()
Example #43
0
def get_notes(files_found, report_folder, seeker):
    data_list = []
    for file_found in files_found:
        file_found = str(file_found)

        if file_found.endswith('.sqlite'):
            db = open_sqlite_db_readonly(file_found)
            cursor = db.cursor()
            cursor.execute('''
                SELECT 
                DATETIME(TabA.ZCREATIONDATE1+978307200,'UNIXEPOCH'), 
                TabA.ZTITLE1,
                TabA.ZSNIPPET,
                TabB.ZTITLE2,
                TabC.ZNAME,
                DATETIME(TabA.ZMODIFICATIONDATE1+978307200,'UNIXEPOCH'),
                case TabA.ZISPASSWORDPROTECTED
                when 0 then "No"
                when 1 then "Yes"
                end,
                TabA.ZPASSWORDHINT,
                case TabA.ZMARKEDFORDELETION
                when 0 then "No"
                when 1 then "Yes"
                end,
                case TabA.ZISPINNED
                when 0 then "No"
                when 1 then "Yes"
                end,
                TabE.ZFILENAME,
                TabE.ZIDENTIFIER,
                TabD.ZFILESIZE,
                TabD.ZTYPEUTI,
                DATETIME(TabD.ZCREATIONDATE+978307200,'UNIXEPOCH'),
                DATETIME(TabD.ZMODIFICATIONDATE+978307200,'UNIXEPOCH')
                FROM ZICCLOUDSYNCINGOBJECT TabA
                INNER JOIN ZICCLOUDSYNCINGOBJECT TabB on TabA.ZFOLDER = TabB.Z_PK
                INNER JOIN ZICCLOUDSYNCINGOBJECT TabC on TabA.ZACCOUNT3 = TabC.Z_PK
                LEFT JOIN ZICCLOUDSYNCINGOBJECT TabD on TabA.Z_PK = TabD.ZNOTE
                LEFT JOIN ZICCLOUDSYNCINGOBJECT TabE on TabD.Z_PK = TabE.ZATTACHMENT1
                WHERE TabA.ZTITLE1 <> ''
                ''')

            all_rows = cursor.fetchall()
            analyzed_file = file_found

    if len(all_rows) > 0:
        for row in all_rows:

            if row[10] is not None and row[11] is not None:
                attachment_file = join(dirname(analyzed_file),
                                       'Accounts/LocalAccount/Media', row[11],
                                       row[10])
                attachment_storage_path = dirname(attachment_file)
                if imghdr.what(attachment_file) == 'jpeg' or imghdr.what(
                        attachment_file) == 'jpg' or imghdr.what(
                            attachment_file) == 'png':
                    thumbnail_path = join(report_folder,
                                          'thumbnail_' + row[10])
                    save_original_attachment_as_thumbnail(
                        attachment_file, thumbnail_path)
                    thumbnail = '<img src="{}">'.format(thumbnail_path)
                else:
                    thumbnail = 'File is not an image or the filetype is not supported yet.'
            else:
                thumbnail = ''
                attachment_storage_path = ''

            if row[12] is not None:
                filesize = '.'.join(
                    str(row[12])[i:i + 3]
                    for i in range(0, len(str(row[12])), 3))
            else:
                filesize = ''

            data_list.append(
                (row[0], row[1], row[2], row[3], row[4], row[5], row[6],
                 row[7], row[8], row[9], thumbnail, row[10],
                 attachment_storage_path, filesize, row[13], row[14], row[15]))

        report = ArtifactHtmlReport('Notes')
        report.start_artifact_report(report_folder, 'Notes')
        report.add_script()
        data_headers = ('Creation Date', 'Note', 'Snippet', 'Folder',
                        'Storage Place', 'Last Modified', 'Password Protected',
                        'Password Hint', 'Marked for Deletion', 'Pinned',
                        'Attachment Thumbnail', 'Attachment Original Filename',
                        'Attachment Storage Folder', 'Attachment Size in KB',
                        'Attachment Type', 'Attachment Creation Date',
                        'Attachment Last Modified')
        report.write_artifact_data_table(
            data_headers,
            data_list,
            analyzed_file,
            html_no_escape=['Attachment Thumbnail'])
        report.end_artifact_report()

        tsvname = 'Notes'
        tsv(report_folder, data_headers, data_list, tsvname)

        tlactivity = 'Notes'
        timeline(report_folder, tlactivity, data_list, data_headers)
    else:
        logfunc('No Notes available')

    db.close()
    return
Example #44
0
    def load(filename, **kwargs):

        # atlas ?
        if filename[:8] == 'atlas://':
            # remove the url
            rfn = filename[8:]
            # last field is the ID
            try:
                rfn, uid = rfn.rsplit('/', 1)
            except ValueError:
                raise ValueError('Image: Invalid %s name for atlas' % filename)

            # search if we already got the atlas loaded
            atlas = Cache.get('kv.atlas', rfn)

            # atlas already loaded, so reupload the missing texture in cache,
            # because when it's not in use, the texture can be removed from the
            # kv.texture cache.
            if atlas:
                texture = atlas[uid]
                fn = 'atlas://%s/%s' % (rfn, uid)
                cid = '{}|{:d}|{:d}'.format(fn, False, 0)
                Cache.append('kv.texture', cid, texture)
                return Image(texture)

            # search with resource
            afn = rfn
            if not afn.endswith('.atlas'):
                afn += '.atlas'
            afn = resource_find(afn)
            if not afn:
                raise Exception('Unable to found %r atlas' % afn)
            atlas = Atlas(afn)
            Cache.append('kv.atlas', rfn, atlas)
            # first time, fill our texture cache.
            for nid, texture in atlas.textures.items():
                fn = 'atlas://%s/%s' % (rfn, nid)
                cid = '{}|{:d}|{:d}'.format(fn, False, 0)
                Cache.append('kv.texture', cid, texture)
            return Image(atlas[uid])

        # extract extensions
        ext = filename.split('.')[-1].lower()

        # prevent url querystrings
        if filename.startswith((('http://', 'https://'))):
            ext = ext.split('?')[0]

        filename = resource_find(filename)

        # special case. When we are trying to load a "zip" file with image, we
        # will use the special zip_loader in ImageLoader. This might return a
        # sequence of images contained in the zip.
        if ext == 'zip':
            return ImageLoader.zip_loader(filename)
        else:
            im = None
            # Get actual image format instead of extension if possible
            ext = imghdr.what(filename) or ext
            for loader in ImageLoader.loaders:
                if ext not in loader.extensions():
                    continue
                Logger.debug('Image%s: Load <%s>' %
                             (loader.__name__[11:], filename))
                im = loader(filename, **kwargs)
                break
            if im is None:
                raise Exception('Unknown <%s> type, no loader found.' % ext)
            return im
Example #45
0
for receiver in receivers:
    msg = EmailMessage()
    msg['Subject'] = 'Look at me!'
    msg['From'] = EMAIL_ADDRESS
    msg['To'] = receiver
    msg.set_content('This is message text!')

    msg.add_alternative("""\
		<!DOCTYPE html>
		<html>
		    <body>
		        <h1 style="color:SlateGray;">This is message text, but in html!</h1>
		    </body>
		</html>
		""",
                        subtype='html')

    with open('./Resources/smiley.jpg', 'rb') as f:
        file_data = f.read()
        file_type = imghdr.what(f.name)
        file_name = f.name

    msg.add_attachment(file_data,
                       maintype='image',
                       subtype=file_type,
                       filename=file_name)

    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
        smtp.login(EMAIL_ADDRESS, EMAIL_PASS)

        smtp.send_message(msg)
Example #46
0
    def readability_by_soup(self, article, url, opts=None, user=None):
        """ 使用BeautifulSoup手动解析网页,提取正文内容
        因为图片文件占内存,为了节省内存,这个函数也做为生成器
        """
        content = self.preprocess(article)
        soup = BeautifulSoup(content, "lxml")

        try:
            title = soup.html.head.title.string
        except AttributeError:
            self.log.warn('object soup invalid!(%s)' % url)
            return
        if not title:
            self.log.warn('article has no title.[%s]' % url)
            return

        title = self.processtitle(title)
        soup.html.head.title.string = title

        if self.keep_only_tags:
            body = soup.new_tag('body')
            try:
                if isinstance(self.keep_only_tags, dict):
                    keep_only_tags = [self.keep_only_tags]
                else:
                    keep_only_tags = self.keep_only_tags
                for spec in keep_only_tags:
                    for tag in soup.find('body').find_all(**spec):
                        body.insert(len(body.contents), tag)
                soup.find('body').replace_with(body)
            except AttributeError:  # soup has no body element
                pass

        for spec in self.remove_tags_after:
            tag = soup.find(**spec)
            remove_beyond(tag, 'next_sibling')

        for spec in self.remove_tags_before:
            tag = soup.find(**spec)
            remove_beyond(tag, 'previous_sibling')

        remove_tags = self.insta_remove_tags + self.remove_tags
        remove_ids = self.insta_remove_ids + self.remove_ids
        remove_classes = self.insta_remove_classes + self.remove_classes
        remove_attrs = self.insta_remove_attrs + self.remove_attrs

        for tag in soup.find_all(remove_tags):
            tag.decompose()
        for id in remove_ids:
            for tag in soup.find_all(attrs={"id": id}):
                tag.decompose()
        for cls in remove_classes:
            for tag in soup.find_all(attrs={"class": cls}):
                tag.decompose()
        for attr in remove_attrs:
            for tag in soup.find_all(attrs={attr: True}):
                del tag[attr]
        for cmt in soup.find_all(text=lambda text: isinstance(text, Comment)):
            cmt.extract()

        if self.extra_css:
            sty = soup.new_tag('style', type="text/css")
            sty.string = self.extra_css
            soup.html.head.append(sty)

        self.soupbeforeimage(soup)

        has_imgs = False
        thumbnail = None

        if self.keep_image:
            opener = URLOpener(self.host, timeout=self.timeout)
            for img in soup.find_all('img'):
                #现在使用延迟加载图片技术的网站越来越多了,这里处理一下
                #注意:如果data-src之类的属性保存的不是真实url就没辙了
                imgurl = img['src'] if 'src' in img.attrs else ''
                if not imgurl:
                    for attr in img.attrs:
                        if attr != 'src' and 'src' in attr:  #很多网站使用data-src
                            imgurl = img[attr]
                            break
                if not imgurl:
                    img.decompose()
                    continue
                if not imgurl.startswith('http'):
                    imgurl = self.urljoin(url, imgurl)
                if self.fetch_img_via_ssl and url.startswith('https://'):
                    imgurl = imgurl.replace('http://', 'https://')
                if self.isfiltered(imgurl):
                    self.log.warn('img filtered:%s' % imgurl)
                    img.decompose()
                    continue
                imgresult = opener.open(imgurl)
                imgcontent = self.process_image(
                    imgresult.content,
                    opts) if imgresult.status_code == 200 else None
                if imgcontent:
                    if len(imgcontent
                           ) < self.img_min_size:  #rexdf too small image
                        img.decompose()
                        continue

                    imgtype = imghdr.what(None, imgcontent)
                    if imgtype:
                        imgmime = r"image/" + imgtype
                        fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype
                                              == 'jpeg' else imgtype)
                        img['src'] = fnimg

                        #使用第一个图片做为目录缩略图
                        if not has_imgs:
                            has_imgs = True
                            thumbnail = imgurl
                            yield (imgmime, imgurl, fnimg, imgcontent, None,
                                   True)
                        else:
                            yield (imgmime, imgurl, fnimg, imgcontent, None,
                                   None)
                    else:
                        img.decompose()
                else:
                    self.log.warn('fetch img failed(err:%d):%s' %
                                  (imgresult.status_code, imgurl))
                    img.decompose()

            #去掉图像上面的链接,以免误触后打开浏览器
            for img in soup.find_all('img'):
                if img.parent and img.parent.parent and \
                    img.parent.name == 'a':
                    img.parent.replace_with(img)
        else:
            for img in soup.find_all('img'):
                img.decompose()

        #如果没有内容标题则添加
        body = soup.html.body
        t = body.find(['h1', 'h2'])
        if not t:
            t = soup.new_tag('h2')
            t.string = title
            body.insert(0, t)
        else:
            totallen = 0
            for ps in t.previous_siblings:
                totallen += len(string_of_tag(ps))
                if totallen > 40:  #此H1/H2在文章中间出现,不是文章标题
                    t = soup.new_tag('h2')
                    t.string = title
                    body.insert(0, t)
                    break

        #删除body的所有属性,以便InsertToc使用正则表达式匹配<body>
        bodyattrs = [attr for attr in body.attrs]
        for attr in bodyattrs:
            del body[attr]

        #将HTML5标签转换为div
        for x in soup.find_all([
                'article', 'aside', 'header', 'footer', 'nav', 'figcaption',
                'figure', 'section', 'time'
        ]):
            x.name = 'div'

        self.soupprocessex(soup)

        #插入分享链接
        if user:
            self.AppendShareLinksToArticle(soup, user, url)

        content = unicode(soup)

        #提取文章内容的前面一部分做为摘要
        brief = u''
        if GENERATE_TOC_DESC:
            for h in body.find_all(['h1', 'h2']):  # 去掉h1/h2,避免和标题重复
                h.decompose()
            for s in body.stripped_strings:
                brief += unicode(s) + u' '
                if len(brief) >= TOC_DESC_WORD_LIMIT:
                    brief = brief[:TOC_DESC_WORD_LIMIT]
                    break
        soup = None

        yield (title, None, None, content, brief, thumbnail)
Example #47
0
def _allowed_file(file_obj, ext):
    import imghdr
    what = imghdr.what(file_obj)
    return imghdr.what(file_obj) in ext
Example #48
0
    def test(self):

        raw_bytes = self.detector.read_jpeg_bytes()
        self.assertEqual(imghdr.what(None, raw_bytes), 'jpeg')
Example #49
0
def create_tf_example(labels, filename, annotations, debug=False):
    """
    Based on:
    https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
    """
    if debug:
        print(filename)

    width, height = getSize(filename)  # Image width and height
    encoded_image_data = loadImage(filename)  # Encoded image bytes

    if imghdr.what(filename) == 'png':
        image_format = b'png'  # b'jpeg' or b'png'
    elif imghdr.what(filename) == 'jpeg':
        image_format = b'jpeg'
    else:
        raise RuntimeError("Only supports PNG or JPEG images")

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for a in annotations:
        # Numeric and text class labels
        classes.append(mapLabel(labels, a['class']))
        classes_text.append(a['class'].encode())

        # Scaled min/maxes
        xmins.append(bounds(a['x'] / width))
        ymins.append(bounds(a['y'] / height))
        xmaxs.append(bounds((a['x'] + a['width']) / width))
        ymaxs.append(bounds((a['y'] + a['height']) / height))

        # We got errors: maximum box coordinate value is larger than 1.010000
        valid = lambda x: x >= 0 and x <= 1
        assert valid(xmins[-1]) and valid(ymins[-1]) and valid(xmaxs[-1]) and valid(ymaxs[-1]), \
                "Invalid values for "+filename+": "+ \
                str(xmins[-1])+","+str(ymins[-1])+","+str(xmaxs[-1])+","+str(ymaxs[-1])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode()),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode()),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Example #50
0
def is_image(file_path):
    return imghdr.what(file_path) is not None
Example #51
0
def getpath():
    while True:
        OgTex = input(
            'Enter original texture filepath or type \"exit()\" to exit the application\n'
        )
        if OgTex == 'exit()': sys.exit()
        elif os.path.isfile(OgTex):
            with open(OgTex, 'rb+') as f:
                f.seek(0x0054)
                check = f.read(4)
                if check == b'DXT5':
                    f.seek(0x000C)
                    ogw = int.from_bytes(f.read(4), "little")
                    ogh = int.from_bytes(f.read(4), "little")
                    break
                else:
                    print('Error: not a DXT5 texture file\n')
        else:
            print('Error: file not found\n')
    while True:
        ModTex = input(
            '\nEnter modified texture filepath or type \"exit()\" to exit the application\n'
        )
        if ModTex == 'exit()': sys.exit()
        elif ModTex == OgTex: print('Error: texture is the same')
        elif os.path.isfile(ModTex):
            with open(ModTex, 'rb+') as f:
                f.seek(0x0054)
                check = f.read(4)
                if check == b'DXT5':
                    f.seek(0x000C)
                    mdw = int.from_bytes(f.read(4), "little")
                    mdh = int.from_bytes(f.read(4), "little")
                    if mdw == ogw and mdh == ogh: break
                    else:
                        print(
                            'Error: modified texture dimensions do not match original texture dimensions'
                        )
                else:
                    print('Error: not a DXT5 texture file')
        else:
            print('Error: file not found')
    option = input(
        '\nDo you want to supply a diff texture (Y/N)? (Must be equal or 1/4 the size of the supplied textures)\n'
    )
    while True:
        if not option.upper() in ['Y', 'N', 'YES', 'NO']:
            print('Error: invalid option')
        else:
            break
        option = input()
    if option.upper() in ['Y', 'YES']:
        while True:
            DiffTex = input('\nEnter diff texture filepath\n')
            if DiffTex == OgTex or DiffTex == ModTex:
                print('Error: texture is the same')
            elif os.path.isfile(DiffTex):
                isimg = imghdr.what(DiffTex)
                if isimg != None:
                    with Image(filename=DiffTex) as dif:
                        difw, difh = dif.size
                        if (difw == ogw
                                and difh == ogh) or (difw == ogw / 4
                                                     and difh == ogh / 4):
                            break
                        else:
                            print(
                                'Error: diff texture size is not equal or 1/4 the size of the supplied textures'
                            )
                else:
                    print('Error: file is not a recognized image format')
            else:
                print('Error: file not found')
    else:
        gendiff(OgTex, ModTex)
    process(OgTex, ModTex, DiffTex)
def up_photo():

    '''
    For uploading photo into HDFS

    '''


    img = request.files.get('photo')
    #Get photo from webpage

    if (not img):
        return redirect('/')
    if not (img and allowed_file(img.filename)):
        return jsonify({"error": 1001, "msg": "Only support .png .PNG .jpg .JPG .bmp .gif"})

    '''
    If not supported images, return error
    
    ''' 

    path = basedir+"/static/photo/"
    imgfilename=img.filename.encode("utf-8").decode("latin1")
    file_path = path+imgfilename
    img.save(file_path)


    '''
    Save the image in local directory 
    '''

    imgType = imghdr.what(file_path)
    imagebase64 = base64.b64encode(open(file_path,'rb').read())
    commonF = Common()
    x=commonF.readImageText(file_path,"all")

    '''
    Get type, inner-text and base64 of that image 

    '''



    x=re.sub('\s','',x)
    x=x.replace('\n', '').replace(' ', '').replace('|','')
    x=("NoTag") if x == "" else (x.lower())
    sstring = img.filename +"|"+ x + "|data:image/" +imgType+";base64," + str(imagebase64, 'utf-8')
    nowstring=sstring.encode("utf-8").decode("latin1")
    
    conf = SparkConf()#.setAppName("Upload One Image to HDFS").setMaster("yarn")
    #sc = SparkContext(conf=conf)
    sc = SparkContext.getOrCreate(conf=conf)
    sqlContext = SQLContext(sc)
    
    uploadedDF = sc.parallelize( [ (img.filename,x,"data:image/" +imgType+";base64," + str(imagebase64, 'utf-8')) ]).toDF(["path","features","binary"])
    uploadedDF.write.mode('append').parquet(dataFrameUrl)  #("hdfs://gpu3:9000/dataFrames/final8")

    '''
    Save it into HDFS 
    '''
    print (nowstring)
    return redirect('/')
Example #53
0
def extract_images(resultsDir, pcapFilePath, pathtoBro):
    imageTypes = [
        'rgb', 'gif', 'pbm', 'pgm', 'ppm', 'tiff', 'rast', 'xbm', 'jpeg',
        'bmp', 'png', 'webp', 'exr'
    ]

    #pcapFilePath = 'bro/test.pcap'
    broScript = './apps/Bro/bro/extract-all-images.bro'

    if not os.path.exists(resultsDir):
        os.makedirs(resultsDir)

    if not os.path.exists(pathtoBro):
        return False, 'DependencyError'

    args = [pathtoBro, '-C', '-r', pcapFilePath, broScript]
    p = subprocess.Popen(args)

    while p.poll() is None:
        if p.poll() is not None:
            break

    extractedImagesDir = 'extractedImages'
    extractedFilesDir = 'extract_files'
    if not os.path.exists(os.path.join(resultsDir, extractedImagesDir)):
        os.makedirs(os.path.join(resultsDir, extractedImagesDir))

    for root, dirs, files in os.walk(extractedFilesDir):
        path = os.path.split(root)

        for fil in files:
            try:
                filesDirPath = os.path.join(root, fil)
                print(len(path) * '---', filesDirPath)

                imagesDirPath = filesDirPath[len(extractedFilesDir) + 1:]
                #print(imagesDirPath)

                imageType = imghdr.what(os.path.join(root, fil))
                if imageType in imageTypes:
                    imagesDirs = imagesDirPath.split("/")
                    if not os.path.exists(resultsDir + "/" +
                                          extractedImagesDir + "/" +
                                          imagesDirs[0]):
                        os.makedirs(resultsDir + "/" + extractedImagesDir +
                                    "/" + imagesDirs[0])
                    if not os.path.exists(resultsDir + "/" +
                                          extractedImagesDir + "/" +
                                          imagesDirs[0] + "/" + imagesDirs[1]):
                        os.makedirs(resultsDir + "/" + extractedImagesDir +
                                    "/" + imagesDirs[0] + "/" + imagesDirs[1])

                    print(fil, 'is:', imageType)
                    try:
                        shutil.copyfile(
                            filesDirPath, resultsDir + "/" +
                            extractedImagesDir + "/" + imagesDirPath)
                    except Exception as e:
                        print(e)

            except PermissionError as e:
                print(e)
                return False, "PermissionError"
            except FileNotFoundError as e:
                print(e)

    #for file in filesNotTypes:
    #	print(file)

    shutil.rmtree(extractedFilesDir)
    return True, "Success"
Example #54
0
def path_is_image(p: Path) -> bool:
    return imghdr.what(p) is not None
Example #55
0
 def try_use(f):
     if not f: return None
     if not imghdr.what(f[0]):   # cannot recognize file type
         return None
     return f[0]
Example #56
0
    def generateCountsGraph(
        self,
        counts,
        sitename,
        widthpx=648,
        resol=72,
    ):
        '''
            Static function to generate graph file via R.
            Graphs *all* of the counts records contained in counts List
        
    
        '''
        log = logging.getLogger()
        from rpy import r as robj

        log.info('Generating graph for %d counts from site %s' %
                 (len(counts), sitename))

        # Calculate graph image information
        widthpx = int(widthpx)
        imgwidth = int(float(widthpx) / float(resol))
        ratio = float(self.config.get('data', 'graphratio'))
        imgheight = int((float(widthpx) * ratio) / float(resol)),

        counts_data = {"datetime": [], "c1": []}
        (fd, tmpgraphfile) = mkstemp()
        log.debug("Temp graph filename = %s" % tmpgraphfile)

        for cr in counts:
            #log.debug("%s" % c)
            epochsecs = time.mktime(cr.datetime.timetuple())
            counts_data["datetime"].append(epochsecs)
            #counts_data["datetime"].append( "%s" % c.datetime   )
            #log.debug("Datetime %s converted to epoch %d" % (c.datetime, epochsecs ))
            counts_data["c1"].append(cr.c1)

            # cr.datetime = "2008-02-11 12:07:08.112117"

        cts = counts_data['c1']
        ctm = counts_data['datetime']
        if len(cts) > 0:
            robj.bitmap(
                tmpgraphfile,
                type="png256",
                width=imgwidth,
                height=imgheight,
                res=resol,
            )

            ymin = int(self.config.get('data', 'counts.graph.ylim.min'))
            ymax = int(self.config.get('data', 'counts.graph.ylim.max'))
            robj.plot(ctm,
                      cts,
                      col="black",
                      main="Counts: %s" % sitename,
                      xlab="Time: (secs since 1970)",
                      ylab="Counts/min",
                      type="l",
                      ylim=(ymin, ymax))
            robj.dev_off()
            import imghdr
            imgtype = imghdr.what(tmpgraphfile)
            log.debug(
                "DataHandler.generateCountsGraph(): OK: What is our tempfile? = %s"
                % tmpgraphfile)
            f = open(tmpgraphfile)
        else:
            log.debug(
                "DataHandler.generateCountsGraph(): No data. Generating proper error image..."
            )
            f = open(self.config.get('data', 'nodatapng'))
        return f
Example #57
0
    if 'pdct_img_main_url' in pdt and pdt[
            'pdct_img_main_url'] and brm.find_brand(
                pdt['pdct_name_on_eretailer'])['brand'] in mh_brands:
        print(pdt['pdct_name_on_eretailer'] + "." +
              pdt['pdct_img_main_url'].split('.')[-1])
        response = requests.get(pdt['pdct_img_main_url'],
                                stream=True,
                                verify=False,
                                headers=headers)
        # response.raw.decode_content = True
        tmp_file_path = '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
            abs(hash(pdt['pdct_img_main_url'])))
        img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer'])
        with open(tmp_file_path, 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
        if imghdr.what(tmp_file_path) is not None:
            img_path = img_path.split('.')[0] + '.' + imghdr.what(
                '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
                    abs(hash(pdt['pdct_img_main_url']))))
            shutil.copyfile(
                '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
                    abs(hash(pdt['pdct_img_main_url']))), img_path)
            products[url].update({
                'img_path': img_path,
                'img_hash': file_hash(img_path)
            })

create_csvs(products, categories, searches, shop_id,
            fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE)
validate_raw_files(fpath_namer(shop_id, 'raw_csv'))
driver.quit()
Example #58
0
    def generateCountsGraph2(
        self,
        counts,
        sitename,
        widthpx=648,
        resol=72,
    ):
        '''
                Static function to generate graph file via R.
                Graphs *all* of the counts records contained in counts List
                This one uses more in-R processing to handle dates/times (since
                Rpy doesn't do automatic conversions). 
            '''
        log = logging.getLogger()
        log.info('Generating graph for %d counts from site %s' %
                 (len(counts), sitename))

        from rpy import r as robj

        # Calculate graph image information
        ratio = float(self.config.get('data', 'graphratio'))
        widthpx = int(widthpx)
        imgwidth = int(float(widthpx) / float(resol))
        imgheight = int(((float(widthpx) * ratio) / float(resol)))
        resol = int(resol)

        # Get unused file/name to put image data into...
        (fd, tmpgraphfile) = mkstemp()
        log.debug("Temp graph filename = %s" % tmpgraphfile)

        # Unpack CountsRecords into counts and timestamps.
        cts = []
        ctm = []
        for cr in counts:
            # cr.datetime = "2008-02-11 12:07:08.112117"
            # cr.c1 = 5440
            cts.append(cr.c1)
            ctm.append(str(cr.datetime))

        log.debug("Got list of %d counts." % len(cts))

        # If there is data for a graph, import into R.
        if len(cts) > 0:
            robj.assign('rcts', cts)
            robj.assign('rctm', ctm)

            # Convert timestamps to POSIXct objects within R.
            # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S"))
            robj(
                '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))'''
            )
            cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % (
                tmpgraphfile, imgwidth, imgheight, resol)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)

            ymin = int(self.config.get('data', 'counts.graph.ylim.min'))
            ymax = int(self.config.get('data', 'counts.graph.ylim.max'))
            #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1))
            xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1])
            cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates:  %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % (
                sitename, xlabel, ymin, ymax)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)
            robj.dev_off()

            # Pull written image and return to caller
            import imghdr
            imgtype = imghdr.what(tmpgraphfile)
            log.debug("OK: What is our tempfile? = %s" % tmpgraphfile)
            f = open(tmpgraphfile)
        else:
            log.debug("No data. Generating proper error image...")
            f = open(self.config.get('data', 'nodatapng'))
        return f
Example #59
0
	def getTile(self, laykey, col, row, zoom, toDstGrid=True, useCache=True):
		"""
		Return bytes data of requested tile
		Return None if unable to get valid data
		Tile is downloaded from map service or directly pick up from cache database if useCache option is True
		"""

		#Select tile matrix set
		if toDstGrid:
			if self.dstGridKey is not None:
				tm = self.dstTms
			else:
				raise ValueError('No destination grid defined')
		else:
			tm = self.srcTms

		#don't try to get tiles out of map bounds
		x,y = tm.getTileCoords(col, row, zoom) #top left
		if row < 0 or col < 0:
			return None
		elif not tm.xmin <= x < tm.xmax or not tm.ymin < y <= tm.ymax:
			return None

		if useCache:
			#check if tile already exists in cache
			cache = self.getCache(laykey, toDstGrid)
			data = cache.getTile(col, row, zoom)

			#if so check if its a valid image
			if data is not None:
				format = imghdr.what(None, data)
				if format is not None:
					return data

		#if tile does not exists in cache or is corrupted, try to download it from map service
		if not toDstGrid:

			data = self.downloadTile(laykey, col, row, zoom)

		else: # build a reprojected tile

			#get tile bbox
			bbox = self.dstTms.getTileBbox(col, row, zoom)
			xmin, ymin, xmax, ymax = bbox

			#get closest zoom level
			res = self.dstTms.getRes(zoom)
			if self.dstTms.units == 'degrees' and self.srcTms.units == 'meters':
				res2 = dd2meters(res)
			elif self.srcTms.units == 'degrees' and self.dstTms.units == 'meters':
				res2 = meters2dd(res)
			else:
				res2 = res
			_zoom = self.srcTms.getNearestZoom(res2)
			_res = self.srcTms.getRes(_zoom)

			#reproj bbox
			crs1, crs2 = self.srcTms.CRS, self.dstTms.CRS
			try:
				_bbox = reprojBbox(crs2, crs1, bbox)
			except Exception as e:
				print('WARN : cannot reproj tile bbox - ' + str(e))
				return None

			#list, download and merge the tiles required to build this one (recursive call)
			mosaic = self.getImage(laykey, _bbox, _zoom, toDstGrid=False, useCache=True, nbThread=4, cpt=False, allowEmptyTile=False)

			if mosaic is None:
				return None

			tileSize = self.dstTms.tileSize

			img = reprojImg(crs1, crs2, mosaic, out_ul=(xmin,ymax), out_size=(tileSize,tileSize), out_res=res, resamplAlg=self.RESAMP_ALG)

			#Get BLOB
			b = io.BytesIO()
			img.save(b, format='PNG')
			data = b.getvalue() #convert bytesio to bytes

		#put the tile in cache database
		if useCache and data is not None:
			cache.putTile(col, row, self.zoom, data)

		return data
Example #60
0
def main():

    ################################
    ''' Makes sure that the path to the URL text file is provided. If not, an error message is displayed. '''

    if len(sys.argv) < 2:
        print 'Please run the code with a valid path to the URL text file as its argument.'
        sys.exit(2)

    ################################
    ''' Storing the path to the file. '''

    filename = sys.argv[1]

    ################################
    ''' Tries to open the file specified, and store its content in a list, and if the procedure fails, an error message is displayed. '''

    try:
        with open(filename) as f:
            list_of_urls = f.readlines()
    except IOError:
        print 'Path or filename provided is not valid.'
        sys.exit(2)

    ################################
    ''' Strips the list of urls of blank lines '''

    list_of_urls = map(lambda s: s.strip(), list_of_urls)
    list_of_urls = filter(None, list_of_urls)

    ################################
    ''' If it does not yet exist, creates a local directory for saving the images. '''

    path_images = './IMAGES'
    if not os.path.exists(path_images):
        os.makedirs(path_images)
    print 'Saving images in {}'.format(path_images)

    ################################
    ''' If it does not yet exist, creates a directory for storing the error log. Also opens the error log file.'''

    path_error_log = './ERROR_LOG'
    if not os.path.exists(path_error_log):
        os.makedirs(path_error_log)
    error_log_file = open("{}/error_log.txt".format(path_error_log), "w")
    print 'Saving error log in {}'.format(path_error_log)

    ################################
    ''' Creates two counter indices to count the number of failures, either in the validity of the image files or the urls. '''

    index_image_fail = 0
    index_url_fail = 0

    ################################
    '''  For-loop going through each of the urls and downloading the images. '''

    for i in range(0, len(list_of_urls)):
        # Tries connecting to the URL and retrieving the image file.
        try:
            # Path for saving the image specified.
            image = "{}/image_{}.jpg".format(path_images, i)
            # Retrieves the image from the URL.
            urllib.urlretrieve(list_of_urls[i], image)
            # If the image retrieved is a corrupted/not valid file type, it deletes it.
            if imghdr.what(image) == None:
                os.remove(image)
                # If it is the first image file failure, creates a list and stores the URL of the file in it.
                # Then keeps adding subsequent failures to the list
                if index_image_fail == 0:
                    list_image_fail = [list_of_urls[i]]
                else:
                    list_image_fail.append(list_of_urls[i])
                # Counts the number of image file failures
                index_image_fail += 1
        # If retrieving the file from the URL fails, stores the url failures in a separate failure list and counts the instances.
        except IOError:
            if index_url_fail == 0:
                list_url_fail = [list_of_urls[i]]
            else:
                list_url_fail.append(list_of_urls[i])
            index_url_fail += 1

    ################################
    ''' ERROR LOG FILE '''
    # If the list of image file failures is not empty, stores the URLs matching the error in the error log file.
    try:
        length = len(list_image_fail)
        error_log_file.write(
            'The image in the following URLs is not a valid image:\n')
        for i in range(0, length):
            error_log_file.write('{}\n'.format(list_image_fail[i]))
    # If the list is empty, says all files are valid in the error log file.
    except NameError:
        error_log_file.write('All files downloaded were valid images.\n')

    # If the list of url failures is not empty, stores the URLs matching the error in the error log file
    try:
        length = len(list_url_fail)
        error_log_file.write('The following URLs could not be reached:\n')
        for i in range(0, length):
            error_log_file.write('{}\n'.format(list_url_fail[i]))
    # If the list is empty, says all files are valid in the error log file.
    except NameError:
        error_log_file.write('All URLs were valid.\n')
    # Closes error log file.
    error_log_file.close()

    ################################

    return None