def sz(fname): with open(fname, 'rb') as fhandle: head = fhandle.read(24) if len(head) != 24: return if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: return width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) except Exception: #IGNORE:W0703 return else: return -1 return width, height
def get_image_size_old(fname): '''Determine the image type of fhandle and return its size. from draco''' with open(fname, 'rb') as fhandle: head = fhandle.read(24) if len(head) != 24: return if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: return width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) except Exception: #IGNORE:W0703 return else: return return {'w': width, 'h': height, 'ar': height / width, 'sz': height * width }
def is_image_allowed(self): field = self.user_avatar.data if field.filename.lower()[-4:] not in current_app.config['ALLOWED_IMG_EXTENSIONS']: raise ValidationError('Invalid file extension') if imghdr.what(field.filename) != 'jpeg' and imghdr.what(field.filename) != 'png': raise ValidationError('Invalid image format') return True
def mk_clean_images(user_images): """ A function that sorts non images and non jpegs. Predicting that the given directory won't always be consisting of photos and only jpegs, mk_clean_images filters out everything that is not a jpeg. After the filtering, a list of clean_images is left - images that won't make the program crash. Args: user_images - chosen user image dir Returns: clean_images - list of only jpegs """ clean_images = [] # Changes cwd to images dir to loop. os.chdir(user_images) # For all images in user_images directory. for lone_image in os.listdir(os.getcwd()): try: # If image has a jpeg extension, add it to clean_images. if imghdr.what(lone_image) == 'jpeg': clean_images.append(lone_image) # If image does not have jpeg extension, do not add to clean_images. elif imghdr.what(lone_image) is not "jpeg": # Will pass True if the file is actually an image. if imghdr.what(lone_image) in image_values: print "%s: not a jpeg. \n" % lone_image # To handle those pesky non images. except IOError: print "%s: not an image. \n" % lone_image continue return clean_images
def _dimensions(self): import struct import imghdr with open(self._filepath, "rb") as fhandle: head = fhandle.read(24) if len(head) != 24: return if imghdr.what(self._filepath) == "png": check = struct.unpack(">i", head[4:8])[0] if check != 0x0d0a1a0a: return width, height = struct.unpack(">ii", head[16:24]) elif imghdr.what(self._filepath) == "gif": width, height = struct.unpack("<HH", head[6:10]) elif imghdr.what(self._filepath) == "jpeg": try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack(">H", fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision" byte. height, width = struct.unpack(">HH", fhandle.read(4)) except Exception: #IGNORE:W0703 return else: return return width, height
def validate_image_file(self, field): if len(field.data.filename) != 0: if field.data.filename[-4:].lower() != '.jpg' and field.data.filename[-4:].lower() != '.png': raise ValidationError('Invalid file extension: please select a jpg or png file') if imghdr.what(field.data) != 'jpeg' and imghdr.what(field.data) != 'png': raise ValidationError('Invalid image format: please select a jpg or png file')
def get_image_size(filename): '''Determine the image type of fhandle and return its size.''' fhandle = open(filename, 'rb') head = fhandle.read(24) if len(head) != 24: raise ValueError('{} is less than 24 bytes, cannot be a valid image.'.format(filename)) if imghdr.what(filename) == 'png': check = struct.unpack(str('>i'), head[4:8])[0] if check != 0x0d0a1a0a: raise ValueError('{} is not a valid png file.'.format(filename)) width, height = struct.unpack(str('>ii'), head[16:24]) elif imghdr.what(filename) == 'gif': width, height = struct.unpack(str('<HH'), head[6:10]) elif imghdr.what(filename) == 'jpeg': try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack(str('>H'), fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack(str('>HH'), fhandle.read(4)) except: raise ValueError('{} is not a valid jpeg file.'.format(filename)) else: raise ValueError('{} must be a jpeg, gif or png image.'.format(filename)) return width, height
def get_image_size(image_fhandle): img_bytes = image_fhandle.getvalue() img_header = image_fhandle.read(24) if len(img_header) != 24: return None, None if imghdr.what(None, img_bytes) == 'png': check = struct.unpack('>i', img_header[4:8])[0] if check != 0x0d0a1a0a: return None, None width, height = struct.unpack('>ii', img_header[16:24]) elif imghdr.what(None, img_bytes) == 'gif': width, height = struct.unpack('<HH', img_header[6:10]) elif imghdr.what(None, img_bytes) == 'jpeg': try: image_fhandle.seek(0) size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: image_fhandle.seek(size, 1) byte = image_fhandle.read(1) while ord(byte) == 0xff: byte = image_fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', image_fhandle.read(2))[0] - 2 image_fhandle.seek(1, 1) width, height = struct.unpack('>HH', image_fhandle.read(4)) except Exception: return None, None else: return None, None return width, height
def get_image_size(fname): """Determine the image type of fhandle and return its size. from draco""" fhandle = open(fname, "rb") head = fhandle.read(24) if len(head) != 24: return if imghdr.what(fname) == "png": check = struct.unpack(">i", head[4:8])[0] if check != 0x0D0A1A0A: return width, height = struct.unpack(">ii", head[16:24]) elif imghdr.what(fname) == "gif": width, height = struct.unpack("<HH", head[6:10]) elif imghdr.what(fname) == "jpeg": try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xC0 <= ftype <= 0xCF: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xFF: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack(">H", fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack(">HH", fhandle.read(4)) except Exception: # IGNORE:W0703 return else: return return width, height
def getImageSize(fname): with open(fname, 'rb') as fhandle: head = fhandle.read(24) if len(head) != 24: raise RuntimeError("Invalid Header") if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: raise RuntimeError("PNG: Invalid check") width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) else: raise RuntimeError("Unsupported format") return width, height
def test_thumbnails(): """ Test thumbnails generation. 1. Create a video (+audio) from gst's videotestsrc 2. Capture thumbnail 3. Everything should get removed because of temp files usage """ # data create_data() as (video_name, thumbnail_name): test_formats = [(".png", "png"), (".jpg", "jpeg"), (".gif", "gif")] for suffix, format in test_formats: with create_data(suffix) as (video_name, thumbnail_name): capture_thumb(video_name, thumbnail_name, width=40) # check result file format assert imghdr.what(thumbnail_name) == format # TODO: check height and width # FIXME: it doesn't work with small width, say, 10px. This should be # fixed somehow suffix, format = test_formats[0] with create_data(suffix, True) as (video_name, thumbnail_name): capture_thumb(video_name, thumbnail_name, width=40) assert imghdr.what(thumbnail_name) == format with create_data(suffix, True) as (video_name, thumbnail_name): capture_thumb(video_name, thumbnail_name, width=10) # smaller width assert imghdr.what(thumbnail_name) == format with create_data(suffix, True) as (video_name, thumbnail_name): capture_thumb(video_name, thumbnail_name, width=100) # bigger width assert imghdr.what(thumbnail_name) == format
def get_image_size(fname): '''Determine the image type of fhandle and return its size. from draco''' fhandle = open(fname, 'rb') head = fhandle.read(24) if len(head) != 24: raise TypeError if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: raise TypeError width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) else: raise TypeError return width, height
def get_image_size(fname): # https://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib '''Determine the image type of fhandle and return its size. from draco''' with open(fname, 'rb') as fhandle: head = fhandle.read(24) if len(head) != 24: raise Exception("Invalid header") if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: raise Exception("png checksum failed") width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) else: raise Exception("Invalid handle") return width, height, imghdr.what(fname)
def zip(base_dir): from contextlib import closing from zipfile import ZipFile, ZIP_DEFLATED import imghdr # caminhar pelas pastas for root, dirs, files in os.walk(base_dir): # nome da pasta onde estão as imagens folder_name = os.path.split(root)[1] + r".cbz" # teste para evitar zips vazios has_imgs = False for name in files: page = os.path.join(root, name) if (imghdr.what(page)) == "jpeg": has_imgs = True if has_imgs: print "criando : " + folder_name # criando o zip so com imagens with closing(ZipFile(folder_name, "w", ZIP_DEFLATED)) as zipcomic: for name in files: page = os.path.join(root, name) if (imghdr.what(page)) == "jpeg": # cortando o path do arquivo arc_name = page[len(base_dir) + len(os.sep):] zipcomic.write(page, arc_name) else: print "ignorado : " + name + " - incompatível com img" move(base_dir)
def download(): for file in files: print(file) if os.path.isdir("all_10_classes/" + file[:-4]) is False: os.makedirs("all_10_classes/" + file[:-4]) else: print(file, " already exists, skipping") continue count = 0 line = 0 f = open(path + file) for url in f: line += 1 try: req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) file_name = "all_10_classes/" + file[:-4] + "/" + file[:-4] + "_" + str(count) response = urllib.request.urlopen(req, timeout=2) out_file = open(file_name, 'wb') shutil.copyfileobj(response, out_file) count += 1 except: print("bad format, or impossible to download line -> ", line) continue if os.path.isfile(file_name): if imghdr.what(file_name) != "jpeg": print(imghdr.what(file_name)) if (imghdr.what(file_name)) is None: os.remove(file_name) print(line, " saving image # ", count)
def from_html_links_to_inline_imgs(content, inline=True, full_path=True): """ Replaces (html) links to attachs with embeded inline images """ content = re.sub("%s" %settings.SITE_URL, '', content) # changes absolute urls to relative urls attach_re = r'"(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/' attach_str_textversion = r'/text/%s/attach/%s/' attach_str = r'/attach/%s/' for match in re.findall(attach_re, content): if match[0]: link = attach_str_textversion %match else: link = attach_str %match[1] attach = Attachment.objects.get(key=match[1]) if inline: img_fmt = imghdr.what(attach.data.path) img = open(attach.data.path, 'rb') data = base64.b64encode(img.read()) img.close() content = content.replace(link, 'data:image/'+img_fmt+';base64,'+data) else: if full_path: content = content.replace(link, attach.data.path) else: img_fmt = imghdr.what(attach.data.path) content = content.replace(link, match[1]+'.'+img_fmt) return content
def test_write_bench_results_correct_num(self): """Correctly writes the bench results with numerical labels""" write_bench_results('bench_data', self.num_data, self.output_dir) # Correctly generates the summarized_results.txt file fp = join(self.output_dir, 'summarized_results.txt') with open(fp, 'U') as f: obs = f.read() exp = ("#label\twall_mean\twall_std\tuser_mean\tuser_std\t" "kernel_mean\tkernel_std\tmem_mean\tmem_std\n" "100\t25\t1\t23\t0.9\t2\t0.1\t1048576\t0.0\n" "200\t50\t2\t46\t2\t4\t0.0\t2097152\t0.0\n" "300\t75\t3\t70\t2.9\t5\t0.001\t3145728\t0.0\n" "400\t100\t4\t94\t4.1\t6\t0.2\t4194304\t0.2\n" "500\t125\t5\t123\t5\t2\t0.02\t5242880\t0.0\n") self.assertEqual(obs, exp) # Correctly generates the curves.txt file fp = join(self.output_dir, 'curves.txt') with open(fp, 'U') as f: obs = f.read() exp = ("Wall time fitted curve\n0.25*x^1 + 0\n" "Memory usage fitted curve\n10486*x^1 + 0\n") self.assertEqual(obs, exp) # Correctly generates the plot figures fp = join(self.output_dir, 'time_fig.png') self.assertEqual(what(fp), 'png') fp = join(self.output_dir, 'mem_fig.png') self.assertEqual(what(fp), 'png')
def validate_image_file(self, field): if len(field.data.filename) != 0: if field.data.filename[-4:].lower() != ".jpg" and field.data.filename[-4:].lower() != ".png": raise ValidationError("Invalid file extension: please select a jpg or png file") if imghdr.what(field.data) != "jpeg" and imghdr.what(field.data) != "png": raise ValidationError("Invalid image format: please select a jpg or png file")
def _get_image_type(image): if imghdr.what(image) == 'jpeg': return 'image/jpeg' elif imghdr.what(image) == 'gif': return 'image/gif' elif imghdr.what(image) == 'png': return 'image/png'
def get_image_info(fname): '''Determine the image type of fhandle and return its size. from draco''' with open(fname, 'rb') as fhandle: head = fhandle.read(32) if len(head) != 32: return if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: return info = [("Type", "png"), ("Dimensions", "%sx%s" % (struct.unpack('>ii', head[16:24])))] elif imghdr.what(fname) == 'gif': info = [("Type", "gif"), ("Dimensions", "%sx%s" % (struct.unpack('<HH', head[6:10])))] elif imghdr.what(fname) == 'jpeg': try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) info = [("Type", "jpeg"), ("Dimensions", "%sx%s" % (width, height)), ("Progressive", is_progressive(fname))] except Exception: # IGNORE:W0703 return elif imghdr.what(fname) == 'pgm': header, width, height, maxval = re.search( b"(^P5\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", head).groups() info = [("Type", "pgm"), ("Dimensions", "%sx%s" % (width, height)), ("maxval", maxval), ("Header", header)] elif imghdr.what(fname) == 'bmp': _, width, height, depth = re.search( b"((\d+)\sx\s" b"(\d+)\sx\s" b"(\d+))", str).groups() info = [("Type", "bmp"), ("Dimensions", "%sx%s" % (width, height)), ("Depth", depth)] else: return info.append(("File size", "%.2f kb" % (os.path.getsize(fname) / 1024))) return info
def test_closed_file(self): stream = open(self.testfile, 'rb') stream.close() with self.assertRaises(ValueError) as cm: imghdr.what(stream) stream = io.BytesIO(self.testdata) stream.close() with self.assertRaises(ValueError) as cm: imghdr.what(stream)
def test_write_comp_results_correct_str(self): """Correctly writes the comp results with string labels""" write_comp_results('comp_data', self.str_comp_data, self.output_dir) # Correctly generates the plot figures fp = join(self.output_dir, 'time_fig.png') self.assertEqual(what(fp), 'png') fp = join(self.output_dir, 'mem_fig.png') self.assertEqual(what(fp), 'png')
def test_data(self): for filename, expected in TEST_FILES: filename = findfile(filename, subdir='imghdrdata') self.assertEqual(imghdr.what(filename), expected) with open(filename, 'rb') as stream: self.assertEqual(imghdr.what(stream), expected) with open(filename, 'rb') as stream: data = stream.read() self.assertEqual(imghdr.what(None, data), expected) self.assertEqual(imghdr.what(None, bytearray(data)), expected)
def test_string_data(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", BytesWarning) for filename, _ in TEST_FILES: filename = findfile(filename, subdir='imghdrdata') with open(filename, 'rb') as stream: data = stream.read().decode('latin1') with self.assertRaises(TypeError): imghdr.what(io.StringIO(data)) with self.assertRaises(TypeError): imghdr.what(None, data)
def test_data(self): for filename, expected in TEST_FILES: filename = findfile(filename, subdir='imghdrdata') self.assertEqual(imghdr.what(filename), expected) ufilename = filename.decode(sys.getfilesystemencoding()) self.assertEqual(imghdr.what(ufilename), expected) with open(filename, 'rb') as stream: self.assertEqual(imghdr.what(stream), expected) with open(filename, 'rb') as stream: data = stream.read() self.assertEqual(imghdr.what(None, data), expected)
def imgToTrack(tag, image): if imghdr.what(image) != "gif": tag.addImage(3, image, u"cover") tag.getImages()[0].mimeType='image/'+(imghdr.what(image)) else: im = Image.open(image) if im.mode != "RGB": im = im.convert("RGB") im.save("imgs/temp.jpg", "JPEG") tag.addImage(3, "imgs/temp.jpg", u"cover") tag.getImages()[0].mimeType='image/jpeg' os.remove("imgs/temp.jpg" % imgs)
def test_bad_args(self): with self.assertRaises(TypeError): imghdr.what() with self.assertRaises(AttributeError): imghdr.what(None) with self.assertRaises(TypeError): imghdr.what(self.testfile, 1) with self.assertRaises(AttributeError): imghdr.what(os.fsencode(self.testfile)) with open(self.testfile, 'rb') as f: with self.assertRaises(AttributeError): imghdr.what(f.fileno())
def validate_upload_file(upload, mime_type): # Make sure files are not corrupted. if mime_type == 'image/jpeg': return imghdr.what(upload) == 'jpeg' elif mime_type == 'image/png': return imghdr.what(upload) == 'png' elif mime_type == 'image/gif': return imghdr.what(upload) == 'gif' elif mime_type == 'application/pdf': doc = PdfFileReader(upload) if doc.numPages >= 0: return True return False
def addImage(self, img): fp = open('com.pyepub.temp', 'w') fp.write(img) fp.close() if imghdr.what('com.pyepub.temp'): img_type = imghdr.what('com.pyepub.temp') else: img_type = 'jpeg' os.remove('com.pyepub.temp') fileName = 'i'+str(self.imageIndex)+'.'+img_type self.epub.writestr('Images/' + fileName, img) self.manifest.append('<item id="%(fileName)s" href="Images/%(fileName)s" media-type="image/%(type)s"/>' % {'fileName': fileName, 'type': img_type}) self.imageIndex += 1 return '../Images/'+fileName
def get_image_size(fname): '''Determine the image type of fhandle and return its size. from draco''' with open(fname, 'rb') as fhandle: head = fhandle.read(32) if len(head) != 32: return if imghdr.what(fname) == 'png': check = struct.unpack('>i', head[4:8])[0] if check != 0x0d0a1a0a: return width, height = struct.unpack('>ii', head[16:24]) elif imghdr.what(fname) == 'gif': width, height = struct.unpack('<HH', head[6:10]) elif imghdr.what(fname) == 'jpeg': try: fhandle.seek(0) # Read 0xff next size = 2 ftype = 0 while not 0xc0 <= ftype <= 0xcf: fhandle.seek(size, 1) byte = fhandle.read(1) while ord(byte) == 0xff: byte = fhandle.read(1) ftype = ord(byte) size = struct.unpack('>H', fhandle.read(2))[0] - 2 # We are at a SOFn block fhandle.seek(1, 1) # Skip `precision' byte. height, width = struct.unpack('>HH', fhandle.read(4)) except Exception: # IGNORE:W0703 return elif imghdr.what(fname) == 'pgm': header, width, height, maxval = re.search( b"(^P5\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n])*" b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", head).groups() width = int(width) height = int(height) elif imghdr.what(fname) == 'bmp': _, width, height, depth = re.search( b"((\d+)\sx\s" b"(\d+)\sx\s" b"(\d+))", str).groups() width = int(width) height = int(height) else: return return width, height
def find_plot_pipelines(need_init=True, filter_threshold=FILTER_THRESHOLD): r''' Exercise find_event_pipeline() and plot_event_pipeline() ''' main_time_start = time() # If configured to do so, initialize temp directory # and fetch all of the HDF5 files from the Internet. if need_init: initialize() for filename_h5 in H5_FILE_LIST: wgetter(filename_h5) # Make all of the DAT files. # For each h5, make a dat file. make_all_dat_files() print('find_plot_pipelines: Filter threshold = ', filter_threshold) number_in_cadence = len(H5_FILE_LIST) print('find_plot_pipelines: Cadence length = ', number_in_cadence) print('find_plot_pipelines: find_event_pipeline({}) ...' .format(PATH_DAT_LIST_FILE)) # With the list of DAT files, do find_event_pipeline() df_event = find_event_pipeline(PATH_DAT_LIST_FILE, filter_threshold=filter_threshold, number_in_cadence=number_in_cadence, user_validation=False, saving=True, csv_name=PATH_CSVF) # CSV file created? if not Path(PATH_CSVF).exists(): raise ValueError('find_plot_pipelines: No CSV of events created') # An event CSV was created. # Validate the hit table file. utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines', N_EVENTS) # Make a list of the HDF5 files. print('find_plot_pipelines: making a list of HDF5 files in ({}) ...' .format(PATH_DAT_LIST_FILE)) with open(PATH_H5_LIST_FILE, 'w') as file_handle: for filename_h5 in H5_FILE_LIST: file_handle.write('{}\n'.format(TESTDIR + filename_h5)) # Do the plots for all of the HDF5/DAT file pairs. print('find_plot_pipelines: plot_event_pipeline({}, {}) ...' .format(PATH_CSVF, PATH_H5_LIST_FILE)) plot_event_pipeline(PATH_CSVF, PATH_H5_LIST_FILE, filter_spec='f{}'.format(filter_threshold), user_validation=False) # Check that the right number of PNG files were created. outdir_list = listdir(TESTDIR) npngs = 0 for cur_file in outdir_list: if cur_file.split('.')[-1] == 'png': if imghdr.what(TESTDIR + cur_file) != 'png': raise ValueError('find_plot_pipelines: File {} is not a PNG file' .format(cur_file)) npngs += 1 if npngs != N_EVENTS: raise ValueError('find_plot_pipelines: Expected to find {} PNG files but observed {}' .format(N_EVENTS, npngs)) # Stop the clock - we're done. main_time_stop = time() print('find_plot_pipelines: End, et = {:.1f} seconds' .format(main_time_stop - main_time_start))
# -*- coding: utf-8 -*- """ imghdr_test.py Created on Sat Apr 20 11:19:17 2019 @author: madhu """ import imghdr file_name = 'oreilly.png' print('File', file_name,'is a:', imghdr.what(file_name))
def _get_internal_emoji(self, fname): f = os.path.join(INTERNAL_EMOJI_DIR, fname) return get_file_b64(f), imghdr.what(f)
def read(self, csv_row): path = str(csv_row[self._column.columns_indexes[0]]) img_data = np.void(open(path, 'r').read( )) if self._is_raw_blob else nibabel.load(path).get_data() img_fmt = imghdr.what(path) return img_data, img_fmt
def _avatar_set(self, msg, avatar_name): """Set avatar for Ludolph (admin only)""" if os.path.splitext( avatar_name)[-1] not in self._avatar_allowed_extensions: raise CommandError( 'You have requested a file that is not supported') avatar = None available_avatar_directories = self._get_avatar_dirs() for avatar_dir in available_avatar_directories: # Create full path to file requested by user avatar_file = os.path.join(avatar_dir, avatar_name) # Split absolute path for check if user is not trying to jump outside allowed dirs path, name = os.path.split(os.path.abspath(avatar_file)) if path not in available_avatar_directories: raise CommandError( 'You are not allowed to set avatar outside defined directories' ) try: with open(avatar_file, 'rb') as f: avatar = f.read() except (OSError, IOError): avatar = None else: break if not avatar: raise CommandError( 'Avatar "%s" has not been found.\n' 'You can list available avatars with the command: **avatar-list**' % avatar_name) self.xmpp.msg_reply( msg, 'I have found the selected avatar, changing it might take few seconds...', preserve_msg=True) xep_0084 = self.xmpp.client.plugin['xep_0084'] avatar_type = 'image/%s' % imghdr.what('', avatar) avatar_id = xep_0084.generate_id(avatar) avatar_bytes = len(avatar) try: logger.debug('Publishing XEP-0084 avatar data') xep_0084.publish_avatar(avatar) except XMPPError as e: logger.error('Could not publish XEP-0084 avatar: %s' % e.text) raise CommandError('Could not publish selected avatar') try: logger.debug('Publishing XEP-0153 avatar vCard data') self.xmpp.client.plugin['xep_0153'].set_avatar(avatar=avatar, mtype=avatar_type) except XMPPError as e: logger.error('Could not publish XEP-0153 vCard avatar: %s' % e.text) raise CommandError('Could not set vCard avatar') self.xmpp.msg_reply(msg, 'Almost done, please be patient', preserve_msg=True) try: logger.debug('Advertise XEP-0084 avatar metadata') xep_0084.publish_avatar_metadata([{ 'id': avatar_id, 'type': avatar_type, 'bytes': avatar_bytes }]) except XMPPError as e: logger.error('Could not publish XEP-0084 metadata: %s' % e.text) raise CommandError('Could not publish avatar metadata') return 'Avatar has been changed :)'
def validate_image(path): ext = imghdr.what(path) if ext == 'jpeg': ext = 'jpg' return ext # returns None if not valid
def readability(self, article, url, opts=None, user=None): """ 使用readability-lxml处理全文信息 因为图片文件占内存,为了节省内存,这个函数也做为生成器 """ content = self.preprocess(article) if not content: return # 提取正文 try: doc = readability.Document(content, positive_keywords=self.positive_classes) summary = doc.summary(html_partial=False) except: # 如果提取正文出错,可能是图片(一个图片做为一篇文章,没有使用html包装) imgtype = imghdr.what(None, content) if imgtype: #如果是图片,则使用一个简单的html做为容器 imgmime = r"image/" + imgtype fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype == 'jpeg' else imgtype) yield (imgmime, url, fnimg, content, None, None) tmphtml = '<html><head><title>Picture</title></head><body><img src="%s" /></body></html>' % fnimg yield ('Picture', None, None, tmphtml, '', None) else: self.log.warn('article is invalid.[%s]' % url) return title = doc.short_title() if not title: self.log.warn('article has no title.[%s]' % url) return title = self.processtitle(title) soup = BeautifulSoup(summary, "lxml") #如果readability解析失败,则启用备用算法(不够好,但有全天候适应能力) body = soup.find('body') head = soup.find('head') if len(body.contents) == 0: from simpleextract import simple_extract summary = simple_extract(content) soup = BeautifulSoup(summary, "lxml") body = soup.find('body') if not body: self.log.warn('extract article content failed.[%s]' % url) return head = soup.find('head') #增加备用算法提示,提取效果不好不要找我,类似免责声明:) info = soup.new_tag( 'p', style='color:#555555;font-size:60%;text-align:right;') info.string = 'extracted by alternative algorithm.' body.append(info) self.log.info('use alternative algorithm to extract content.') if not head: head = soup.new_tag('head') soup.html.insert(0, head) if not head.find('title'): t = soup.new_tag('title') t.string = title head.append(t) #如果没有内容标题则添加 t = body.find(['h1', 'h2']) if not t: t = soup.new_tag('h2') t.string = title body.insert(0, t) else: totallen = 0 for ps in t.previous_siblings: totallen += len(string_of_tag(ps)) if totallen > 40: #此H1/H2在文章中间出现,不是文章标题 t = soup.new_tag('h2') t.string = title body.insert(0, t) break if self.remove_tags: for tag in soup.find_all(self.remove_tags): tag.decompose() for id in self.remove_ids: for tag in soup.find_all(attrs={"id": id}): tag.decompose() for cls in self.remove_classes: for tag in soup.find_all(attrs={"class": cls}): tag.decompose() for attr in self.remove_attrs: for tag in soup.find_all(attrs={attr: True}): del tag[attr] for cmt in soup.find_all(text=lambda text: isinstance(text, Comment)): cmt.extract() #删除body的所有属性,以便InsertToc使用正则表达式匹配<body> bodyattrs = [attr for attr in body.attrs] for attr in bodyattrs: del body[attr] if self.extra_css: sty = soup.new_tag('style', type="text/css") sty.string = self.extra_css soup.html.head.append(sty) self.soupbeforeimage(soup) has_imgs = False thumbnail = None if self.keep_image: opener = URLOpener(self.host, timeout=self.timeout) for img in soup.find_all('img'): #现在使用延迟加载图片技术的网站越来越多了,这里处理一下 #注意:如果data-src之类的属性保存的不是真实url就没辙了 imgurl = img['src'] if 'src' in img.attrs else '' if not imgurl: for attr in img.attrs: if attr != 'src' and 'src' in attr: #很多网站使用data-src imgurl = img[attr] break if not imgurl: img.decompose() continue if not imgurl.startswith('http'): imgurl = self.urljoin(url, imgurl) if self.fetch_img_via_ssl and url.startswith('https://'): imgurl = imgurl.replace('http://', 'https://') if self.isfiltered(imgurl): self.log.warn('img filtered : %s' % imgurl) img.decompose() continue imgresult = opener.open(imgurl) imgcontent = self.process_image( imgresult.content, opts) if imgresult.status_code == 200 else None if imgcontent: if len(imgcontent ) < self.img_min_size: #rexdf too small image img.decompose() continue imgtype = imghdr.what(None, imgcontent) if imgtype: imgmime = r"image/" + imgtype fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype == 'jpeg' else imgtype) img['src'] = fnimg #使用第一个图片做为目录缩略图 if not has_imgs: has_imgs = True thumbnail = imgurl yield (imgmime, imgurl, fnimg, imgcontent, None, True) else: yield (imgmime, imgurl, fnimg, imgcontent, None, None) else: img.decompose() else: self.log.warn('fetch img failed(err:%d):%s' % (imgresult.status_code, imgurl)) img.decompose() #去掉图像上面的链接,以免误触后打开浏览器 for img in soup.find_all('img'): if img.parent and img.parent.parent and \ img.parent.name == 'a': img.parent.replace_with(img) else: for img in soup.find_all('img'): img.decompose() #将HTML5标签转换为div for x in soup.find_all([ 'article', 'aside', 'header', 'footer', 'nav', 'figcaption', 'figure', 'section', 'time' ]): x.name = 'div' self.soupprocessex(soup) #插入分享链接 if user: self.AppendShareLinksToArticle(soup, user, url) content = unicode(soup) #提取文章内容的前面一部分做为摘要 brief = u'' if GENERATE_TOC_DESC: for h in body.find_all(['h1', 'h2']): # 去掉h1/h2,避免和标题重复 h.decompose() for s in body.stripped_strings: brief += unicode(s) + u' ' if len(brief) >= TOC_DESC_WORD_LIMIT: brief = brief[:TOC_DESC_WORD_LIMIT] break soup = None yield (title, None, None, content, brief, thumbnail)
def Items(self, opts=None, user=None): """ 生成器,返回一个元组 对于HTML:section,url,title,content,brief,thumbnail 对于图片,mime,url,filename,content,brief,thumbnail 如果是图片,仅第一个图片的thumbnail返回True,其余为None """ decoder = AutoDecoder(False) timeout = self.timeout for section, url in self.feeds: opener = URLOpener(self.host, timeout=timeout) result = opener.open(url) status_code, content = result.status_code, result.content if status_code != 200 or not content: self.log.warn('fetch article failed(%d):%s.' % (status_code, url)) continue if self.page_encoding: try: content = content.decode(self.page_encoding) except UnicodeDecodeError: content = decoder.decode(content, opener.realurl, result.headers) else: content = decoder.decode(content, opener.realurl, result.headers) content = self.preprocess(content) soup = BeautifulSoup(content, "lxml") head = soup.find('head') if not head: head = soup.new_tag('head') soup.html.insert(0, head) if not head.find('title'): t = soup.new_tag('title') t.string = section head.append(t) try: title = soup.html.head.title.string except AttributeError: title = section #self.log.warn('object soup invalid!(%s)'%url) #continue title = self.processtitle(title) if self.keep_only_tags: body = soup.new_tag('body') try: if isinstance(self.keep_only_tags, dict): keep_only_tags = [self.keep_only_tags] else: keep_only_tags = self.keep_only_tags for spec in keep_only_tags: for tag in soup.find('body').find_all(**spec): body.insert(len(body.contents), tag) soup.find('body').replace_with(body) except AttributeError: # soup has no body element pass for spec in self.remove_tags_after: tag = soup.find(**spec) remove_beyond(tag, 'next_sibling') for spec in self.remove_tags_before: tag = soup.find(**spec) remove_beyond(tag, 'previous_sibling') remove_tags = self.insta_remove_tags + self.remove_tags remove_ids = self.insta_remove_ids + self.remove_ids remove_classes = self.insta_remove_classes + self.remove_classes remove_attrs = self.insta_remove_attrs + self.remove_attrs for tag in soup.find_all(remove_tags): tag.decompose() for id in remove_ids: for tag in soup.find_all(attrs={"id": id}): tag.decompose() for cls in remove_classes: for tag in soup.find_all(attrs={"class": cls}): tag.decompose() for attr in remove_attrs: for tag in soup.find_all(attrs={attr: True}): del tag[attr] for cmt in soup.find_all( text=lambda text: isinstance(text, Comment)): cmt.extract() #删除body的所有属性,以便InsertToc使用正则表达式匹配<body> body = soup.html.body bodyattrs = [attr for attr in body.attrs] for attr in bodyattrs: del body[attr] if self.extra_css: sty = soup.new_tag('style', type="text/css") sty.string = self.extra_css soup.html.head.append(sty) has_imgs = False thumbnail = None if self.keep_image: self.soupbeforeimage(soup) for img in soup.find_all('img'): #现在使用延迟加载图片技术的网站越来越多了,这里处理一下 #注意:如果data-src之类的属性保存的不是真实url就没辙了 imgurl = img['src'] if 'src' in img.attrs else '' if not imgurl: for attr in img.attrs: if attr != 'src' and 'src' in attr: #很多网站使用data-src imgurl = img[attr] break if not imgurl: img.decompose() continue if not imgurl.startswith('http'): imgurl = self.urljoin(url, imgurl) if self.fetch_img_via_ssl and url.startswith('https://'): imgurl = imgurl.replace('http://', 'https://') if self.isfiltered(imgurl): self.log.warn('img filtered:%s' % imgurl) img.decompose() continue imgresult = opener.open(imgurl) imgcontent = self.process_image( imgresult.content, opts) if imgresult.status_code == 200 else None if imgcontent: if len(imgcontent ) < self.img_min_size: #rexdf too small image img.decompose() continue imgtype = imghdr.what(None, imgcontent) if imgtype: imgmime = r"image/" + imgtype fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype == 'jpeg' else imgtype) img['src'] = fnimg #使用第一个图片做为目录摘要图 if not has_imgs: has_imgs = True thumbnail = imgurl yield (imgmime, imgurl, fnimg, imgcontent, None, True) else: yield (imgmime, imgurl, fnimg, imgcontent, None, None) else: img.decompose() else: self.log.warn('fetch img failed(err:%d):%s' % (imgresult.status_code, imgurl)) img.decompose() #去掉图像上面的链接 for img in soup.find_all('img'): if img.parent and img.parent.parent and \ img.parent.name == 'a': img.parent.replace_with(img) else: for img in soup.find_all('img'): img.decompose() self.soupprocessex(soup) content = unicode(soup) #提取文章内容的前面一部分做为摘要 brief = u'' if GENERATE_TOC_DESC: for h in body.find_all(['h1', 'h2']): # 去掉h1/h2,避免和标题重复 h.decompose() for s in body.stripped_strings: brief += unicode(s) + u' ' if len(brief) >= TOC_DESC_WORD_LIMIT: brief = brief[:TOC_DESC_WORD_LIMIT] break soup = None content = self.postprocess(content) yield (section, url, title, content, brief, thumbnail)
def serve(self, rendition): # Open and serve the file rendition.file.open('rb') image_format = imghdr.what(rendition.file) return StreamingHttpResponse(FileWrapper(rendition.file), content_type='image/' + image_format)
#!/usr/bin/env python3 import imghdr import os import tempfile import shutil import uuid # Set the directory you want to start from rootDir = '/Volumes/Vanessa/2020-IPHONE-BACKUP-FILE/' newRootDir = '/Volumes/Vanessa/Jane/' for dirName, subdirList, fileList in os.walk(rootDir): print('Found directory: %s' % dirName) for fname in fileList: file = os.path.join(dirName, fname) ext = imghdr.what(file) if ext == 'jpeg': newFile = os.path.join(newRootDir, str(uuid.uuid4()) + '.' + ext) print(newFile) shutil.copy(file, newFile)
def get_file_extension(self, file_name, decoded_file): extension = imghdr.what(file_name, decoded_file) extension = "jpg" if extension == "jpeg" else extension return extension
def _main(args): model_path = os.path.expanduser(args.model_path) assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.' anchors_path = os.path.expanduser(args.anchors_path) classes_path = os.path.expanduser(args.classes_path) test_path = os.path.expanduser(args.test_path) output_path = os.path.expanduser(args.output_path) if not os.path.exists(output_path): print('Creating output path {}'.format(output_path)) os.mkdir(output_path) sess = K.get_session() # TODO: Remove dependence on Tensorflow session. with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] with open(anchors_path) as f: anchors = f.readline() print(" ancharos ", anchors) if len(anchors) > 0: anchors = [float(x) for x in anchors.split(',')] anchors = np.array(anchors).reshape(-1, 2) yolo_model = load_model(model_path) # Verify model, anchors, and classes are compatible num_classes = len(class_names) num_anchors = len(anchors) # TODO: Assumes dim ordering is channel last model_output_channels = yolo_model.layers[-1].output_shape[-1] print(num_classes, num_anchors, model_output_channels) assert model_output_channels == num_anchors * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes. ' \ 'Specify matching anchors and classes with --anchors_path and ' \ '--classes_path flags.' print('{} model, anchors, and classes loaded.'.format(model_path)) # Check if model is fully convolutional, assuming channel last order. model_image_size = yolo_model.layers[0].input_shape[1:3] is_fixed_size = model_image_size != (None, None) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(10101) # Fixed seed for consistent colors across runs. random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. # TODO: Wrap these backend operations with Keras layers. yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) input_image_shape = K.placeholder(shape=(2, )) boxes, scores, classes = yolo_eval(yolo_outputs, input_image_shape, score_threshold=args.score_threshold, iou_threshold=args.iou_threshold) for image_file in os.listdir(test_path): try: image_type = imghdr.what(os.path.join(test_path, image_file)) if not image_type: continue except: continue image = Image.open(os.path.join(test_path, image_file)) if is_fixed_size: # TODO: When resizing we can use minibatch input. resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC) image_data = np.array(resized_image, dtype='float32') else: # Due to skip connection + max pooling in YOLO_v2, inputs must have # width and height as multiples of 32. new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) resized_image = image.resize(new_image_size, Image.BICUBIC) image_data = np.array(resized_image, dtype='float32') print(image_data.shape) image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ yolo_model.input: image_data, input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), image_file)) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 for i, c in reversed(list(enumerate(out_classes))): predicted_class = class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw image.save(os.path.join(output_path, image_file), quality=90) sess.close()
def get_notes(files_found, report_folder, seeker): data_list = [] for file_found in files_found: file_found = str(file_found) if file_found.endswith('.sqlite'): db = open_sqlite_db_readonly(file_found) cursor = db.cursor() cursor.execute(''' SELECT DATETIME(TabA.ZCREATIONDATE1+978307200,'UNIXEPOCH'), TabA.ZTITLE1, TabA.ZSNIPPET, TabB.ZTITLE2, TabC.ZNAME, DATETIME(TabA.ZMODIFICATIONDATE1+978307200,'UNIXEPOCH'), case TabA.ZISPASSWORDPROTECTED when 0 then "No" when 1 then "Yes" end, TabA.ZPASSWORDHINT, case TabA.ZMARKEDFORDELETION when 0 then "No" when 1 then "Yes" end, case TabA.ZISPINNED when 0 then "No" when 1 then "Yes" end, TabE.ZFILENAME, TabE.ZIDENTIFIER, TabD.ZFILESIZE, TabD.ZTYPEUTI, DATETIME(TabD.ZCREATIONDATE+978307200,'UNIXEPOCH'), DATETIME(TabD.ZMODIFICATIONDATE+978307200,'UNIXEPOCH') FROM ZICCLOUDSYNCINGOBJECT TabA INNER JOIN ZICCLOUDSYNCINGOBJECT TabB on TabA.ZFOLDER = TabB.Z_PK INNER JOIN ZICCLOUDSYNCINGOBJECT TabC on TabA.ZACCOUNT3 = TabC.Z_PK LEFT JOIN ZICCLOUDSYNCINGOBJECT TabD on TabA.Z_PK = TabD.ZNOTE LEFT JOIN ZICCLOUDSYNCINGOBJECT TabE on TabD.Z_PK = TabE.ZATTACHMENT1 WHERE TabA.ZTITLE1 <> '' ''') all_rows = cursor.fetchall() analyzed_file = file_found if len(all_rows) > 0: for row in all_rows: if row[10] is not None and row[11] is not None: attachment_file = join(dirname(analyzed_file), 'Accounts/LocalAccount/Media', row[11], row[10]) attachment_storage_path = dirname(attachment_file) if imghdr.what(attachment_file) == 'jpeg' or imghdr.what( attachment_file) == 'jpg' or imghdr.what( attachment_file) == 'png': thumbnail_path = join(report_folder, 'thumbnail_' + row[10]) save_original_attachment_as_thumbnail( attachment_file, thumbnail_path) thumbnail = '<img src="{}">'.format(thumbnail_path) else: thumbnail = 'File is not an image or the filetype is not supported yet.' else: thumbnail = '' attachment_storage_path = '' if row[12] is not None: filesize = '.'.join( str(row[12])[i:i + 3] for i in range(0, len(str(row[12])), 3)) else: filesize = '' data_list.append( (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], thumbnail, row[10], attachment_storage_path, filesize, row[13], row[14], row[15])) report = ArtifactHtmlReport('Notes') report.start_artifact_report(report_folder, 'Notes') report.add_script() data_headers = ('Creation Date', 'Note', 'Snippet', 'Folder', 'Storage Place', 'Last Modified', 'Password Protected', 'Password Hint', 'Marked for Deletion', 'Pinned', 'Attachment Thumbnail', 'Attachment Original Filename', 'Attachment Storage Folder', 'Attachment Size in KB', 'Attachment Type', 'Attachment Creation Date', 'Attachment Last Modified') report.write_artifact_data_table( data_headers, data_list, analyzed_file, html_no_escape=['Attachment Thumbnail']) report.end_artifact_report() tsvname = 'Notes' tsv(report_folder, data_headers, data_list, tsvname) tlactivity = 'Notes' timeline(report_folder, tlactivity, data_list, data_headers) else: logfunc('No Notes available') db.close() return
def load(filename, **kwargs): # atlas ? if filename[:8] == 'atlas://': # remove the url rfn = filename[8:] # last field is the ID try: rfn, uid = rfn.rsplit('/', 1) except ValueError: raise ValueError('Image: Invalid %s name for atlas' % filename) # search if we already got the atlas loaded atlas = Cache.get('kv.atlas', rfn) # atlas already loaded, so reupload the missing texture in cache, # because when it's not in use, the texture can be removed from the # kv.texture cache. if atlas: texture = atlas[uid] fn = 'atlas://%s/%s' % (rfn, uid) cid = '{}|{:d}|{:d}'.format(fn, False, 0) Cache.append('kv.texture', cid, texture) return Image(texture) # search with resource afn = rfn if not afn.endswith('.atlas'): afn += '.atlas' afn = resource_find(afn) if not afn: raise Exception('Unable to found %r atlas' % afn) atlas = Atlas(afn) Cache.append('kv.atlas', rfn, atlas) # first time, fill our texture cache. for nid, texture in atlas.textures.items(): fn = 'atlas://%s/%s' % (rfn, nid) cid = '{}|{:d}|{:d}'.format(fn, False, 0) Cache.append('kv.texture', cid, texture) return Image(atlas[uid]) # extract extensions ext = filename.split('.')[-1].lower() # prevent url querystrings if filename.startswith((('http://', 'https://'))): ext = ext.split('?')[0] filename = resource_find(filename) # special case. When we are trying to load a "zip" file with image, we # will use the special zip_loader in ImageLoader. This might return a # sequence of images contained in the zip. if ext == 'zip': return ImageLoader.zip_loader(filename) else: im = None # Get actual image format instead of extension if possible ext = imghdr.what(filename) or ext for loader in ImageLoader.loaders: if ext not in loader.extensions(): continue Logger.debug('Image%s: Load <%s>' % (loader.__name__[11:], filename)) im = loader(filename, **kwargs) break if im is None: raise Exception('Unknown <%s> type, no loader found.' % ext) return im
for receiver in receivers: msg = EmailMessage() msg['Subject'] = 'Look at me!' msg['From'] = EMAIL_ADDRESS msg['To'] = receiver msg.set_content('This is message text!') msg.add_alternative("""\ <!DOCTYPE html> <html> <body> <h1 style="color:SlateGray;">This is message text, but in html!</h1> </body> </html> """, subtype='html') with open('./Resources/smiley.jpg', 'rb') as f: file_data = f.read() file_type = imghdr.what(f.name) file_name = f.name msg.add_attachment(file_data, maintype='image', subtype=file_type, filename=file_name) with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp: smtp.login(EMAIL_ADDRESS, EMAIL_PASS) smtp.send_message(msg)
def readability_by_soup(self, article, url, opts=None, user=None): """ 使用BeautifulSoup手动解析网页,提取正文内容 因为图片文件占内存,为了节省内存,这个函数也做为生成器 """ content = self.preprocess(article) soup = BeautifulSoup(content, "lxml") try: title = soup.html.head.title.string except AttributeError: self.log.warn('object soup invalid!(%s)' % url) return if not title: self.log.warn('article has no title.[%s]' % url) return title = self.processtitle(title) soup.html.head.title.string = title if self.keep_only_tags: body = soup.new_tag('body') try: if isinstance(self.keep_only_tags, dict): keep_only_tags = [self.keep_only_tags] else: keep_only_tags = self.keep_only_tags for spec in keep_only_tags: for tag in soup.find('body').find_all(**spec): body.insert(len(body.contents), tag) soup.find('body').replace_with(body) except AttributeError: # soup has no body element pass for spec in self.remove_tags_after: tag = soup.find(**spec) remove_beyond(tag, 'next_sibling') for spec in self.remove_tags_before: tag = soup.find(**spec) remove_beyond(tag, 'previous_sibling') remove_tags = self.insta_remove_tags + self.remove_tags remove_ids = self.insta_remove_ids + self.remove_ids remove_classes = self.insta_remove_classes + self.remove_classes remove_attrs = self.insta_remove_attrs + self.remove_attrs for tag in soup.find_all(remove_tags): tag.decompose() for id in remove_ids: for tag in soup.find_all(attrs={"id": id}): tag.decompose() for cls in remove_classes: for tag in soup.find_all(attrs={"class": cls}): tag.decompose() for attr in remove_attrs: for tag in soup.find_all(attrs={attr: True}): del tag[attr] for cmt in soup.find_all(text=lambda text: isinstance(text, Comment)): cmt.extract() if self.extra_css: sty = soup.new_tag('style', type="text/css") sty.string = self.extra_css soup.html.head.append(sty) self.soupbeforeimage(soup) has_imgs = False thumbnail = None if self.keep_image: opener = URLOpener(self.host, timeout=self.timeout) for img in soup.find_all('img'): #现在使用延迟加载图片技术的网站越来越多了,这里处理一下 #注意:如果data-src之类的属性保存的不是真实url就没辙了 imgurl = img['src'] if 'src' in img.attrs else '' if not imgurl: for attr in img.attrs: if attr != 'src' and 'src' in attr: #很多网站使用data-src imgurl = img[attr] break if not imgurl: img.decompose() continue if not imgurl.startswith('http'): imgurl = self.urljoin(url, imgurl) if self.fetch_img_via_ssl and url.startswith('https://'): imgurl = imgurl.replace('http://', 'https://') if self.isfiltered(imgurl): self.log.warn('img filtered:%s' % imgurl) img.decompose() continue imgresult = opener.open(imgurl) imgcontent = self.process_image( imgresult.content, opts) if imgresult.status_code == 200 else None if imgcontent: if len(imgcontent ) < self.img_min_size: #rexdf too small image img.decompose() continue imgtype = imghdr.what(None, imgcontent) if imgtype: imgmime = r"image/" + imgtype fnimg = "img%d.%s" % (self.imgindex, 'jpg' if imgtype == 'jpeg' else imgtype) img['src'] = fnimg #使用第一个图片做为目录缩略图 if not has_imgs: has_imgs = True thumbnail = imgurl yield (imgmime, imgurl, fnimg, imgcontent, None, True) else: yield (imgmime, imgurl, fnimg, imgcontent, None, None) else: img.decompose() else: self.log.warn('fetch img failed(err:%d):%s' % (imgresult.status_code, imgurl)) img.decompose() #去掉图像上面的链接,以免误触后打开浏览器 for img in soup.find_all('img'): if img.parent and img.parent.parent and \ img.parent.name == 'a': img.parent.replace_with(img) else: for img in soup.find_all('img'): img.decompose() #如果没有内容标题则添加 body = soup.html.body t = body.find(['h1', 'h2']) if not t: t = soup.new_tag('h2') t.string = title body.insert(0, t) else: totallen = 0 for ps in t.previous_siblings: totallen += len(string_of_tag(ps)) if totallen > 40: #此H1/H2在文章中间出现,不是文章标题 t = soup.new_tag('h2') t.string = title body.insert(0, t) break #删除body的所有属性,以便InsertToc使用正则表达式匹配<body> bodyattrs = [attr for attr in body.attrs] for attr in bodyattrs: del body[attr] #将HTML5标签转换为div for x in soup.find_all([ 'article', 'aside', 'header', 'footer', 'nav', 'figcaption', 'figure', 'section', 'time' ]): x.name = 'div' self.soupprocessex(soup) #插入分享链接 if user: self.AppendShareLinksToArticle(soup, user, url) content = unicode(soup) #提取文章内容的前面一部分做为摘要 brief = u'' if GENERATE_TOC_DESC: for h in body.find_all(['h1', 'h2']): # 去掉h1/h2,避免和标题重复 h.decompose() for s in body.stripped_strings: brief += unicode(s) + u' ' if len(brief) >= TOC_DESC_WORD_LIMIT: brief = brief[:TOC_DESC_WORD_LIMIT] break soup = None yield (title, None, None, content, brief, thumbnail)
def _allowed_file(file_obj, ext): import imghdr what = imghdr.what(file_obj) return imghdr.what(file_obj) in ext
def test(self): raw_bytes = self.detector.read_jpeg_bytes() self.assertEqual(imghdr.what(None, raw_bytes), 'jpeg')
def create_tf_example(labels, filename, annotations, debug=False): """ Based on: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md """ if debug: print(filename) width, height = getSize(filename) # Image width and height encoded_image_data = loadImage(filename) # Encoded image bytes if imghdr.what(filename) == 'png': image_format = b'png' # b'jpeg' or b'png' elif imghdr.what(filename) == 'jpeg': image_format = b'jpeg' else: raise RuntimeError("Only supports PNG or JPEG images") xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ ] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for a in annotations: # Numeric and text class labels classes.append(mapLabel(labels, a['class'])) classes_text.append(a['class'].encode()) # Scaled min/maxes xmins.append(bounds(a['x'] / width)) ymins.append(bounds(a['y'] / height)) xmaxs.append(bounds((a['x'] + a['width']) / width)) ymaxs.append(bounds((a['y'] + a['height']) / height)) # We got errors: maximum box coordinate value is larger than 1.010000 valid = lambda x: x >= 0 and x <= 1 assert valid(xmins[-1]) and valid(ymins[-1]) and valid(xmaxs[-1]) and valid(ymaxs[-1]), \ "Invalid values for "+filename+": "+ \ str(xmins[-1])+","+str(ymins[-1])+","+str(xmaxs[-1])+","+str(ymaxs[-1]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def is_image(file_path): return imghdr.what(file_path) is not None
def getpath(): while True: OgTex = input( 'Enter original texture filepath or type \"exit()\" to exit the application\n' ) if OgTex == 'exit()': sys.exit() elif os.path.isfile(OgTex): with open(OgTex, 'rb+') as f: f.seek(0x0054) check = f.read(4) if check == b'DXT5': f.seek(0x000C) ogw = int.from_bytes(f.read(4), "little") ogh = int.from_bytes(f.read(4), "little") break else: print('Error: not a DXT5 texture file\n') else: print('Error: file not found\n') while True: ModTex = input( '\nEnter modified texture filepath or type \"exit()\" to exit the application\n' ) if ModTex == 'exit()': sys.exit() elif ModTex == OgTex: print('Error: texture is the same') elif os.path.isfile(ModTex): with open(ModTex, 'rb+') as f: f.seek(0x0054) check = f.read(4) if check == b'DXT5': f.seek(0x000C) mdw = int.from_bytes(f.read(4), "little") mdh = int.from_bytes(f.read(4), "little") if mdw == ogw and mdh == ogh: break else: print( 'Error: modified texture dimensions do not match original texture dimensions' ) else: print('Error: not a DXT5 texture file') else: print('Error: file not found') option = input( '\nDo you want to supply a diff texture (Y/N)? (Must be equal or 1/4 the size of the supplied textures)\n' ) while True: if not option.upper() in ['Y', 'N', 'YES', 'NO']: print('Error: invalid option') else: break option = input() if option.upper() in ['Y', 'YES']: while True: DiffTex = input('\nEnter diff texture filepath\n') if DiffTex == OgTex or DiffTex == ModTex: print('Error: texture is the same') elif os.path.isfile(DiffTex): isimg = imghdr.what(DiffTex) if isimg != None: with Image(filename=DiffTex) as dif: difw, difh = dif.size if (difw == ogw and difh == ogh) or (difw == ogw / 4 and difh == ogh / 4): break else: print( 'Error: diff texture size is not equal or 1/4 the size of the supplied textures' ) else: print('Error: file is not a recognized image format') else: print('Error: file not found') else: gendiff(OgTex, ModTex) process(OgTex, ModTex, DiffTex)
def up_photo(): ''' For uploading photo into HDFS ''' img = request.files.get('photo') #Get photo from webpage if (not img): return redirect('/') if not (img and allowed_file(img.filename)): return jsonify({"error": 1001, "msg": "Only support .png .PNG .jpg .JPG .bmp .gif"}) ''' If not supported images, return error ''' path = basedir+"/static/photo/" imgfilename=img.filename.encode("utf-8").decode("latin1") file_path = path+imgfilename img.save(file_path) ''' Save the image in local directory ''' imgType = imghdr.what(file_path) imagebase64 = base64.b64encode(open(file_path,'rb').read()) commonF = Common() x=commonF.readImageText(file_path,"all") ''' Get type, inner-text and base64 of that image ''' x=re.sub('\s','',x) x=x.replace('\n', '').replace(' ', '').replace('|','') x=("NoTag") if x == "" else (x.lower()) sstring = img.filename +"|"+ x + "|data:image/" +imgType+";base64," + str(imagebase64, 'utf-8') nowstring=sstring.encode("utf-8").decode("latin1") conf = SparkConf()#.setAppName("Upload One Image to HDFS").setMaster("yarn") #sc = SparkContext(conf=conf) sc = SparkContext.getOrCreate(conf=conf) sqlContext = SQLContext(sc) uploadedDF = sc.parallelize( [ (img.filename,x,"data:image/" +imgType+";base64," + str(imagebase64, 'utf-8')) ]).toDF(["path","features","binary"]) uploadedDF.write.mode('append').parquet(dataFrameUrl) #("hdfs://gpu3:9000/dataFrames/final8") ''' Save it into HDFS ''' print (nowstring) return redirect('/')
def extract_images(resultsDir, pcapFilePath, pathtoBro): imageTypes = [ 'rgb', 'gif', 'pbm', 'pgm', 'ppm', 'tiff', 'rast', 'xbm', 'jpeg', 'bmp', 'png', 'webp', 'exr' ] #pcapFilePath = 'bro/test.pcap' broScript = './apps/Bro/bro/extract-all-images.bro' if not os.path.exists(resultsDir): os.makedirs(resultsDir) if not os.path.exists(pathtoBro): return False, 'DependencyError' args = [pathtoBro, '-C', '-r', pcapFilePath, broScript] p = subprocess.Popen(args) while p.poll() is None: if p.poll() is not None: break extractedImagesDir = 'extractedImages' extractedFilesDir = 'extract_files' if not os.path.exists(os.path.join(resultsDir, extractedImagesDir)): os.makedirs(os.path.join(resultsDir, extractedImagesDir)) for root, dirs, files in os.walk(extractedFilesDir): path = os.path.split(root) for fil in files: try: filesDirPath = os.path.join(root, fil) print(len(path) * '---', filesDirPath) imagesDirPath = filesDirPath[len(extractedFilesDir) + 1:] #print(imagesDirPath) imageType = imghdr.what(os.path.join(root, fil)) if imageType in imageTypes: imagesDirs = imagesDirPath.split("/") if not os.path.exists(resultsDir + "/" + extractedImagesDir + "/" + imagesDirs[0]): os.makedirs(resultsDir + "/" + extractedImagesDir + "/" + imagesDirs[0]) if not os.path.exists(resultsDir + "/" + extractedImagesDir + "/" + imagesDirs[0] + "/" + imagesDirs[1]): os.makedirs(resultsDir + "/" + extractedImagesDir + "/" + imagesDirs[0] + "/" + imagesDirs[1]) print(fil, 'is:', imageType) try: shutil.copyfile( filesDirPath, resultsDir + "/" + extractedImagesDir + "/" + imagesDirPath) except Exception as e: print(e) except PermissionError as e: print(e) return False, "PermissionError" except FileNotFoundError as e: print(e) #for file in filesNotTypes: # print(file) shutil.rmtree(extractedFilesDir) return True, "Success"
def path_is_image(p: Path) -> bool: return imghdr.what(p) is not None
def try_use(f): if not f: return None if not imghdr.what(f[0]): # cannot recognize file type return None return f[0]
def generateCountsGraph( self, counts, sitename, widthpx=648, resol=72, ): ''' Static function to generate graph file via R. Graphs *all* of the counts records contained in counts List ''' log = logging.getLogger() from rpy import r as robj log.info('Generating graph for %d counts from site %s' % (len(counts), sitename)) # Calculate graph image information widthpx = int(widthpx) imgwidth = int(float(widthpx) / float(resol)) ratio = float(self.config.get('data', 'graphratio')) imgheight = int((float(widthpx) * ratio) / float(resol)), counts_data = {"datetime": [], "c1": []} (fd, tmpgraphfile) = mkstemp() log.debug("Temp graph filename = %s" % tmpgraphfile) for cr in counts: #log.debug("%s" % c) epochsecs = time.mktime(cr.datetime.timetuple()) counts_data["datetime"].append(epochsecs) #counts_data["datetime"].append( "%s" % c.datetime ) #log.debug("Datetime %s converted to epoch %d" % (c.datetime, epochsecs )) counts_data["c1"].append(cr.c1) # cr.datetime = "2008-02-11 12:07:08.112117" cts = counts_data['c1'] ctm = counts_data['datetime'] if len(cts) > 0: robj.bitmap( tmpgraphfile, type="png256", width=imgwidth, height=imgheight, res=resol, ) ymin = int(self.config.get('data', 'counts.graph.ylim.min')) ymax = int(self.config.get('data', 'counts.graph.ylim.max')) robj.plot(ctm, cts, col="black", main="Counts: %s" % sitename, xlab="Time: (secs since 1970)", ylab="Counts/min", type="l", ylim=(ymin, ymax)) robj.dev_off() import imghdr imgtype = imghdr.what(tmpgraphfile) log.debug( "DataHandler.generateCountsGraph(): OK: What is our tempfile? = %s" % tmpgraphfile) f = open(tmpgraphfile) else: log.debug( "DataHandler.generateCountsGraph(): No data. Generating proper error image..." ) f = open(self.config.get('data', 'nodatapng')) return f
if 'pdct_img_main_url' in pdt and pdt[ 'pdct_img_main_url'] and brm.find_brand( pdt['pdct_name_on_eretailer'])['brand'] in mh_brands: print(pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) response = requests.get(pdt['pdct_img_main_url'], stream=True, verify=False, headers=headers) # response.raw.decode_content = True tmp_file_path = '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))) img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer']) with open(tmp_file_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) if imghdr.what(tmp_file_path) is not None: img_path = img_path.split('.')[0] + '.' + imghdr.what( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url'])))) shutil.copyfile( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))), img_path) products[url].update({ 'img_path': img_path, 'img_hash': file_hash(img_path) }) create_csvs(products, categories, searches, shop_id, fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE) validate_raw_files(fpath_namer(shop_id, 'raw_csv')) driver.quit()
def generateCountsGraph2( self, counts, sitename, widthpx=648, resol=72, ): ''' Static function to generate graph file via R. Graphs *all* of the counts records contained in counts List This one uses more in-R processing to handle dates/times (since Rpy doesn't do automatic conversions). ''' log = logging.getLogger() log.info('Generating graph for %d counts from site %s' % (len(counts), sitename)) from rpy import r as robj # Calculate graph image information ratio = float(self.config.get('data', 'graphratio')) widthpx = int(widthpx) imgwidth = int(float(widthpx) / float(resol)) imgheight = int(((float(widthpx) * ratio) / float(resol))) resol = int(resol) # Get unused file/name to put image data into... (fd, tmpgraphfile) = mkstemp() log.debug("Temp graph filename = %s" % tmpgraphfile) # Unpack CountsRecords into counts and timestamps. cts = [] ctm = [] for cr in counts: # cr.datetime = "2008-02-11 12:07:08.112117" # cr.c1 = 5440 cts.append(cr.c1) ctm.append(str(cr.datetime)) log.debug("Got list of %d counts." % len(cts)) # If there is data for a graph, import into R. if len(cts) > 0: robj.assign('rcts', cts) robj.assign('rctm', ctm) # Convert timestamps to POSIXct objects within R. # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S")) robj( '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))''' ) cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % ( tmpgraphfile, imgwidth, imgheight, resol) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) ymin = int(self.config.get('data', 'counts.graph.ylim.min')) ymax = int(self.config.get('data', 'counts.graph.ylim.max')) #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1)) xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1]) cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates: %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % ( sitename, xlabel, ymin, ymax) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) robj.dev_off() # Pull written image and return to caller import imghdr imgtype = imghdr.what(tmpgraphfile) log.debug("OK: What is our tempfile? = %s" % tmpgraphfile) f = open(tmpgraphfile) else: log.debug("No data. Generating proper error image...") f = open(self.config.get('data', 'nodatapng')) return f
def getTile(self, laykey, col, row, zoom, toDstGrid=True, useCache=True): """ Return bytes data of requested tile Return None if unable to get valid data Tile is downloaded from map service or directly pick up from cache database if useCache option is True """ #Select tile matrix set if toDstGrid: if self.dstGridKey is not None: tm = self.dstTms else: raise ValueError('No destination grid defined') else: tm = self.srcTms #don't try to get tiles out of map bounds x,y = tm.getTileCoords(col, row, zoom) #top left if row < 0 or col < 0: return None elif not tm.xmin <= x < tm.xmax or not tm.ymin < y <= tm.ymax: return None if useCache: #check if tile already exists in cache cache = self.getCache(laykey, toDstGrid) data = cache.getTile(col, row, zoom) #if so check if its a valid image if data is not None: format = imghdr.what(None, data) if format is not None: return data #if tile does not exists in cache or is corrupted, try to download it from map service if not toDstGrid: data = self.downloadTile(laykey, col, row, zoom) else: # build a reprojected tile #get tile bbox bbox = self.dstTms.getTileBbox(col, row, zoom) xmin, ymin, xmax, ymax = bbox #get closest zoom level res = self.dstTms.getRes(zoom) if self.dstTms.units == 'degrees' and self.srcTms.units == 'meters': res2 = dd2meters(res) elif self.srcTms.units == 'degrees' and self.dstTms.units == 'meters': res2 = meters2dd(res) else: res2 = res _zoom = self.srcTms.getNearestZoom(res2) _res = self.srcTms.getRes(_zoom) #reproj bbox crs1, crs2 = self.srcTms.CRS, self.dstTms.CRS try: _bbox = reprojBbox(crs2, crs1, bbox) except Exception as e: print('WARN : cannot reproj tile bbox - ' + str(e)) return None #list, download and merge the tiles required to build this one (recursive call) mosaic = self.getImage(laykey, _bbox, _zoom, toDstGrid=False, useCache=True, nbThread=4, cpt=False, allowEmptyTile=False) if mosaic is None: return None tileSize = self.dstTms.tileSize img = reprojImg(crs1, crs2, mosaic, out_ul=(xmin,ymax), out_size=(tileSize,tileSize), out_res=res, resamplAlg=self.RESAMP_ALG) #Get BLOB b = io.BytesIO() img.save(b, format='PNG') data = b.getvalue() #convert bytesio to bytes #put the tile in cache database if useCache and data is not None: cache.putTile(col, row, self.zoom, data) return data
def main(): ################################ ''' Makes sure that the path to the URL text file is provided. If not, an error message is displayed. ''' if len(sys.argv) < 2: print 'Please run the code with a valid path to the URL text file as its argument.' sys.exit(2) ################################ ''' Storing the path to the file. ''' filename = sys.argv[1] ################################ ''' Tries to open the file specified, and store its content in a list, and if the procedure fails, an error message is displayed. ''' try: with open(filename) as f: list_of_urls = f.readlines() except IOError: print 'Path or filename provided is not valid.' sys.exit(2) ################################ ''' Strips the list of urls of blank lines ''' list_of_urls = map(lambda s: s.strip(), list_of_urls) list_of_urls = filter(None, list_of_urls) ################################ ''' If it does not yet exist, creates a local directory for saving the images. ''' path_images = './IMAGES' if not os.path.exists(path_images): os.makedirs(path_images) print 'Saving images in {}'.format(path_images) ################################ ''' If it does not yet exist, creates a directory for storing the error log. Also opens the error log file.''' path_error_log = './ERROR_LOG' if not os.path.exists(path_error_log): os.makedirs(path_error_log) error_log_file = open("{}/error_log.txt".format(path_error_log), "w") print 'Saving error log in {}'.format(path_error_log) ################################ ''' Creates two counter indices to count the number of failures, either in the validity of the image files or the urls. ''' index_image_fail = 0 index_url_fail = 0 ################################ ''' For-loop going through each of the urls and downloading the images. ''' for i in range(0, len(list_of_urls)): # Tries connecting to the URL and retrieving the image file. try: # Path for saving the image specified. image = "{}/image_{}.jpg".format(path_images, i) # Retrieves the image from the URL. urllib.urlretrieve(list_of_urls[i], image) # If the image retrieved is a corrupted/not valid file type, it deletes it. if imghdr.what(image) == None: os.remove(image) # If it is the first image file failure, creates a list and stores the URL of the file in it. # Then keeps adding subsequent failures to the list if index_image_fail == 0: list_image_fail = [list_of_urls[i]] else: list_image_fail.append(list_of_urls[i]) # Counts the number of image file failures index_image_fail += 1 # If retrieving the file from the URL fails, stores the url failures in a separate failure list and counts the instances. except IOError: if index_url_fail == 0: list_url_fail = [list_of_urls[i]] else: list_url_fail.append(list_of_urls[i]) index_url_fail += 1 ################################ ''' ERROR LOG FILE ''' # If the list of image file failures is not empty, stores the URLs matching the error in the error log file. try: length = len(list_image_fail) error_log_file.write( 'The image in the following URLs is not a valid image:\n') for i in range(0, length): error_log_file.write('{}\n'.format(list_image_fail[i])) # If the list is empty, says all files are valid in the error log file. except NameError: error_log_file.write('All files downloaded were valid images.\n') # If the list of url failures is not empty, stores the URLs matching the error in the error log file try: length = len(list_url_fail) error_log_file.write('The following URLs could not be reached:\n') for i in range(0, length): error_log_file.write('{}\n'.format(list_url_fail[i])) # If the list is empty, says all files are valid in the error log file. except NameError: error_log_file.write('All URLs were valid.\n') # Closes error log file. error_log_file.close() ################################ return None