def download_images(image_list, url, output_path): image_results = [] image_hashes = [] for image in image_list: # this filters out images not from our target domain if url not in image: continue try: print("[v] Downloading %s" % image) response = requests.get(image) except: print("[!] Failed to download: %s" % image) continue if "image" in response.headers['content-type']: sha1 = hashlib.sha1(response.content).hexdigest() if sha1 not in image_hashes: image_hashes.append(sha1) image_path = os.path.join(output_path, "%s-%s" % (sha1,image.split("/")[-1])) with open(image_path,"wb") as fd: fd.write(response.content) print("[*] Saved %s" % image) info = pyexifinfo.get_json(image_path) info[0]['ImageHash'] = sha1 image_results.append(info[0]) return image_results
def exif_pdf(self, filename): fields = ["Author", "Year", "Journal", "Title", "Publisher", "Page", "Address", "Annote", "Booktitle", "Chapter", "Crossred", "Edition", "Editor", "HowPublished", "Institution", "Month", "Note", "Number", "Organization", "Pages", "School", "Series", "Type", "Url", "Volume", "Doi", "File"] op=pexif.get_json(filename) try: new_op = { field: str(value) for field in fields for key, value in op[0].items() if field.lower() in key.lower() } if 'Author' not in new_op: new_op['Author'] = 'Unknown' id_auth=new_op["Author"].split()[-1] id_tit = (new_op["Title"].split()[:2]) id_tit.append(id_auth) id_val = "_".join(id_tit) new_op["ID"] = str(id_val) new_op["ENTRYTYPE"] = "article" op[0] = new_op db = BibDatabase() db.entries = op writer = BibTexWriter() pdf_buff = (writer.write(db)) self.create_textview(pdf_buff) except: self.Messages.on_error_clicked("Can't extract data from this pdf file", "Try other methods")
def build_json_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int = 0, extension: str = ".json", ) -> None: """ generate the json preview. Default implementation is based on ExifTool """ metadata = pyexifinfo.get_json(file_path)[0] with open(cache_path + preview_name + extension, "w") as jsonfile: json.dump(metadata, jsonfile)
def run(analyzer_name, job_id, filepath, filename, md5, additional_config_params): logger.info("started analyzer {} job_id {}" "".format(analyzer_name, job_id)) report = general.get_basic_report_template(analyzer_name) try: results = {} results['magic'] = magic.from_file(filepath) results['mimetype'] = magic.from_file(filepath, mime=True) results['filetype'] = pyexifinfo.fileType(filepath) exif_report = pyexifinfo.get_json(filepath) if exif_report: exif_report_cleaned = { key: value for key, value in exif_report[0].items() if not (key.startswith("File") or key.startswith("SourceFile")) } results['exiftool'] = exif_report_cleaned binary = general.get_binary(job_id, logger) results['md5'] = hashlib.md5(binary).hexdigest() results['sha1'] = hashlib.sha1(binary).hexdigest() results['sha256'] = hashlib.sha256(binary).hexdigest() results['ssdeep'] = pydeep.hash_file(filepath).decode() report['report'] = results except AnalyzerRunException as e: error_message = "job_id:{} analyzer:{} md5:{} filename: {} Analyzer Error {}" \ "".format(job_id, analyzer_name, md5, filename, e) logger.error(error_message) report['errors'].append(error_message) report['success'] = False except Exception as e: traceback.print_exc() error_message = "job_id:{} analyzer:{} md5:{} filename: {} Unexpected Error {}" \ "".format(job_id, analyzer_name, md5, filename, e) logger.exception(error_message) report['errors'].append(str(e)) report['success'] = False else: report['success'] = True general.set_report_and_cleanup(job_id, report, logger) logger.info("ended analyzer {} job_id {}" "".format(analyzer_name, job_id)) return report
def get_exif(self, photo_file: str) -> Dict[str, str]: """ Returns a dictionary with interesting features from image EXIF. Args: photo_file: A `pathlib.Path` object with the bsolute path to the file location. Returns: A dictionary with the exif fields and value for the specific file. """ logger.trace(f"Extracting exif for file {photo_file}") return { key[5:]: value for key, value in pyexif.get_json(photo_file)[0].items() if key[5:] in self.interesting_features }
def findBestCreationDate(mediaPath): # Dig through the metadata in the media file to find a suitable creation date jsonExifData = p.get_json(mediaPath) media_date = checkForDateInJson(jsonExifData, "EXIF:DateTimeOriginal") if media_date is not None: print "using EXIF:DateTimeOriginal of " + str(media_date) + " for value of media date" return media_date media_date = checkForDateInJson(jsonExifData, "RIFF:DateTimeOriginal") if media_date is not None: print "using RIFF:DateTimeOriginal of " + str(media_date) + " for value of media date" return media_date media_date = checkForDateInJson(jsonExifData, "QuickTime:CreateDate") if media_date is not None: print "using QuickTime:CreateDate of " + str(media_date) + " for value of media date" return media_date media_date = checkForDateInJson(jsonExifData, "Composite:GPSDateTime") if media_date is not None: print "using Composite:GPSDateTime of " + str(media_date) + " for value of media date" return media_date media_date = checkForDateInJson(jsonExifData, "EXIF:CreateDate") if media_date is not None: print "using EXIF:CreateDate of " + str(media_date) + " for value of media date" return media_date # No useful information in the EXIF, next check the filename # e.g. IMG_20140830_163939.JPG filenameDate = None try: root_dir, filename = os.path.split(mediaPath) filenameDate = datetime.strptime(filename[:19], 'IMG_%Y%m%d_%H%M%S') print "using date from filename of " + str(filenameDate) + " for value of media date" return filenameDate except Exception: pass # We were unable to find anything useful in the EXIF data or filename # As a last resort, let's take a date from the folder where this image is found root_dir, last_dir = os.path.split(os.path.dirname(mediaPath)) media_date = datetime.strptime(last_dir, '%Y-%m-%d') if media_date is not None: print "using date from containing folder of " + str(media_date) + " for value of media date" return media_date return None
def __init__(self, path): self.path = path self.md5 = None inputfile = None try: inputfile = open(path, 'r') # Take md5 of file for comparison with others self.md5 = hashlib.md5(inputfile.read()).hexdigest() log.debug("md5sum: %s", self.md5) # Use exifread on image files, it's faster. if self.path.endswith('.jpg') or self.path.endswith('.jpeg'): inputfile.seek(0) self.tags = exifread.process_file(inputfile, details=False, stop_tag=datetag_exifread) else: self.tags = pyexifinfo.get_json(self.path)[0] datestring = None if datetag_exiftool in self.tags: # Try two date formats. datestring = self.tags[datetag_exiftool].strip() elif datetag_exifread in self.tags: datestring = self.tags[datetag_exifread].printable.strip() if datestring: log.debug("date in exif looks like this: '%s'", datestring) try: log.debug("trying first format") self.dt = datetime.datetime.strptime( datestring, "%Y:%m:%d %H:%M:%S") except: log.debug("First date format failed, trying second") self.dt = datetime.datetime.strptime( datestring, "%Y-%m-%d %H:%M:%S") else: self.dt = None except Exception as err: log.exception("Error processing %s: %s\n", path, str(err)) sys.exit(1) finally: if inputfile is not None: inputfile.close()
def get_video_metadata(file_path): exifinfo = pyexifinfo.get_json(file_path)[0] gps = exifinfo['QuickTime:GPSCoordinates'] mktime = exifinfo['QuickTime:CreationDate'] args = gps.split(',') lat, lon, elev = tuple(args[i] for i in range()) utcmktime = datetime.datetime.strptime( mktime, '%Y:%m:%d %H:%M:%S%z').strftime("%a %b %d %Y %H:%M:%S %Z") return { 'latitude': lat, 'longitude': lon, 'elevation': elev, 'creationtime': utcmktime }
def extract_wd_from_exif(filepaths): """ Extracts working distance from EXIFs of a seris of images, specified by their file paths. Working distance is defined as the distance from the aperture to the closest surface of the object in sharp focus. Args: filepaths: List of paths for images to extract working distances. Returns: wds: A list of floating point numbers of working distances, in mm. """ exifs = [pei.get_json(filepath)[0] for filepath in filepaths] wds = [ float(exif['MakerNotes:FocusDistance'][:-2]) * 1000 for exif in exifs ] return wds
def getPhotoAllTags (filename): # Check for python v2.7 or better if sys.version_info < (2, 7): print "\n Must use python 2.7 or greater, exiting...\n\n" sys.exit() # Verify that given filename exists if not (os.path.isfile(filename)): print '\n\n File does NOT exist: {0}\n Exiting...\n\n'.format(filename) sys.exit() # Extract all EXIF/XMP/IPTC from picture, seems to return a JSON structure as the first element of a list datalist = pyexifinfo.get_json(filename) # Get the first element of the list, which ends up being a JSON structure jsonExif = datalist[0] # Return the entire JSON data structure (for the photo metatags) to the caller return jsonExif
def getPhotoAllTags(filename): # Check for python v2.7 or better if sys.version_info < (2, 7): print "\n Must use python 2.7 or greater, exiting...\n\n" sys.exit() # Verify that given filename exists if not (os.path.isfile(filename)): print '\n\n File does NOT exist: {0}\n Exiting...\n\n'.format( filename) sys.exit() # Extract all EXIF/XMP/IPTC from picture, seems to return a JSON structure as the first element of a list datalist = pyexifinfo.get_json(filename) # Get the first element of the list, which ends up being a JSON structure jsonExif = datalist[0] # Return the entire JSON data structure (for the photo metatags) to the caller return jsonExif
def download_images(image_list, url): image_results = [] image_hashes = [] for image in image_list: # this filters out images not from our target domain if url not in image: continue try: print "[v] Downloading %s" % image response = requests.get(image) except: print "[!] Failed to download: %s" % image continue if "image" in response.headers['content-type']: sha1 = hashlib.sha1(response.content).hexdigest() if sha1 not in image_hashes: image_hashes.append(sha1) image_path = "waybackimages/%s-%s" % (sha1, image.split("/")[-1]) with open(image_path, "wb") as fd: fd.write(response.content) print "[*] Saved %s" % image info = pyexifinfo.get_json(image_path) info[0]['ImageHash'] = sha1 image_results.append(info[0]) return image_results
def check(base): im = Image.open(base) width, height = im.size widths = [3264,1920,3776, 2688] heights = [2448,1080,2124, 1512] exif_json = exif.get_json(base) try: temp = exif_json[0]['MakerNotes:AmbientTemperatureFahrenheit'] return [int(temp[:-2]), [width, height], exif_json] except: pass if (width not in widths) or (height not in heights): #If the image is not from one of the cameras return [int(-9999), [width, height], exif_json] elif (widths.index(width) != heights.index(height)): #if its not the right w x h combination return [int(-9999), [width, height], exif_json] try: model = exif_json[0]['EXIF:Model'] if model == "SG565FV-8M": #HCO ScoutGuard (no temp) return [int(-9999), [width, height], exif_json] except: pass return [None, [width, height], exif_json]
def run(self): results = {} results["magic"] = magic.from_file(self.filepath) results["mimetype"] = magic.from_file(self.filepath, mime=True) results["filetype"] = pyexifinfo.fileType(self.filepath) exif_report = pyexifinfo.get_json(self.filepath) if exif_report: exif_report_cleaned = { key: value for key, value in exif_report[0].items() if not (key.startswith("File") or key.startswith("SourceFile")) } results["exiftool"] = exif_report_cleaned binary = get_binary(self.job_id) results["md5"] = hashlib.md5(binary).hexdigest() results["sha1"] = hashlib.sha1(binary).hexdigest() results["sha256"] = hashlib.sha256(binary).hexdigest() results["ssdeep"] = pydeep.hash_file(self.filepath).decode() return results
def main(): global DEBUG parser = argparse.ArgumentParser( description='Download a file as a thumbnail from box') parser.add_argument('fname', action='store', help='JPG file name') args = parser.parse_args() fname = args.fname f = open(fname, 'rb') tags = exifread.process_file(f) print 'EXIFREAD' print tags #for tag in tags: #print '{0}:{1}'.format(tag,tags[tag]) print 'PIL' img = Image.open(fname) #print img.__dict__ #tags = img._getexif() #print tags #for tag in tags: #print '{0}:{1}'.format(tag,tags[tag]) img.verify() print img.format if img.format in ['JPEG', 'JPG', 'TIFF']: ex = img._getexif() print 'ex: {0}'.format(ex) exif1 = convert_exif_to_dict(ex) #if exif is not None: #print exif['DateTime'] print exif1 print img.info exif_json = pyexif.get_json(fname) print exif_json
def exifJSON(): print("Running exiftool to JSON") os.chdir(ROOT_DIR + "/media/") mediadir = os.listdir() mediafiles = len(mediadir) jsonbar = Bar('Processing', max=mediafiles) for i in range(mediafiles): for filename in os.listdir("."): exifoutputjson = exif.get_json(filename) #basejson = os.path.basename(filename) os.chdir(ROOT_DIR + "/exifdata/json") #Prints output to json file print(json.dumps(exifoutputjson, sort_keys=True, indent=0, separators=(',', ': ')), file=open(filename + ".json", "w")) #print(json.dumps(exifoutputjson, sort_keys=True, indent=0, separators=(',', ': ')), # file= open(os.path.splitext(basejson)[0]+".json","w")) jsonbar.next() os.chdir(ROOT_DIR + "/media") break jsonbar.finish()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-s", "--mess", help="source directory (processed recursively)", required=True) parser.add_argument("-t", "--tidy", help="target directory", required=True) parser.add_argument("-D", "--debug", help="increase verbosity", action="store_true") parser.add_argument("--override", help="override DCIM instance directory") group = parser.add_mutually_exclusive_group() group.add_argument("-c", "--cr2", help="filter only cr2 extension", action="store_true") group.add_argument("-d", "--dng", help="filter only dng extension", action="store_true") args = parser.parse_args() top = args.mess destination = args.tidy try: os.remove(DUPLICATE_LIST) except OSError: pass for root, dirs, filenames in os.walk(top): for filename in filenames: full_path = os.path.join(root, filename) if args.cr2: if not filename.upper().endswith(CR2_RAW): continue elif args.dng: if not filename.upper().endswith(DNG_RAW): continue if (filename.upper().endswith(CR2_RAW) or filename.upper().endswith(DNG_RAW) or filename.upper().endswith(PSD_PSD) or re.search(JPG_RGX, filename) or re.search(TIF_RGX, filename)): full_path_info = pyexifinfo.get_json(full_path)[0] # returned as list, so grab only 1st value date_time_original = full_path_info.get(DATETIME_ORIG) if not date_time_original: print("Warning: Skipping date-time-less file: {0}".format(full_path)) continue full_path_vendor_grab = full_path_info.get(EXIF_MODEL) if full_path_vendor_grab: full_path_vendor = full_path_vendor_grab.split()[0].upper() else: print("Warning: Skipping camera-less file: {0}".format(full_path)) continue full_path_model = full_path_info[EXIF_MODEL].lower().replace(" ", "_") dcim_dirindex = full_path_info.get(DIR_INDEX) dcim_dirindex_guess = re.search(INSTANCE, full_path) if args.override: full_path_dcim = args.override elif dcim_dirindex: full_path_dcim = str(dcim_dirindex) + full_path_vendor elif dcim_dirindex_guess: full_path_dcim = dcim_dirindex_guess.group(0) print("Warning: Guessing directory-index from path of file: {0}".format(full_path)) else: print("Warning: Skipping directory-index-less file: {0}".format(full_path)) continue full_path_destination_dir = os.path.join(destination, full_path_model, DCIM, full_path_dcim) if args.debug: pprint.pprint(pyexifinfo.get_json(full_path)) print(full_path) print(date_time_original) print(full_path_vendor) print(full_path_model) print(dcim_dirindex) print(dcim_dirindex_guess) print(full_path_dcim) print(full_path_destination_dir) attempt_transfer(filename, root, date_time_original, full_path_destination_dir) # return else: print("Warning: Skipping unlisted file: {0}".format(full_path))
def AjustaTomoDef(self, context): scn = context.scene #Cria diretorios temporarios e copia o conteudo para um deles #os.chdir('DEL') tmpdirCopy = tempfile.mkdtemp() tmpdirTomo = tempfile.mkdtemp() shutil.copytree(scn.my_tool.path, tmpdirCopy+'COPY') # Lista os arquivos e salva arquivo de texto # Testa se existe e remove if os.path.exists('ListaArquivos.txt'): os.remove('ListaArquivos.txt') #for dirname, dirnames, filenames in os.walk('.'): for dirname, dirnames, filenames in os.walk(tmpdirCopy+'COPY'): # print path to all subdirectories first. # for subdirname in dirnames: # print(os.path.join(dirname, subdirname)) for filename in filenames: #print(os.path.join(dirname, filename)) ArquivosListados = os.path.join(dirname, filename)+'\n' with open("ListaArquivos.txt", "a") as arq: arq.write(ArquivosListados) arq.close() # Conta linhas do arquivo def obter_n_linhas (nomeDoArquivo): arquivo = open(nomeDoArquivo, "r") n_linhas = sum(1 for linha in arquivo) arquivo.close() return n_linhas def InstanceNumber(Arquivo): try: ds = dicom.dcmread(Arquivo, force=True) # Diretório e arquivo concatenados #ds = dicom.dcmread(Arquivo, force=True) # Diretório e arquivo concatenados instance_number = ds.data_element("InstanceNumber") instanceLimpa1 = str(instance_number).split('IS: ') instanceLimpa2 = str(instanceLimpa1[1]).strip('"') except: print("Não rolou leitura do DICOM!") instanceLimpa2 = "Error" return instanceLimpa2 NumeroLinhas = obter_n_linhas ('ListaArquivos.txt') #----------------------------------------------------------- # Le arquivo e cria pastas #ContadorLinhas = 0 with open('ListaArquivos.txt','r') as f: ListaArquivos=f.readlines() print("Criado Lista Arquivo1") DCMNum = 0 for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip('\n') # mostra linha 1 sem o caractere de quebra de linha # print(ArquivoAtual) DCMInstanceNumber = InstanceNumber(ArquivoAtual) os.chdir(tmpdirTomo) shutil.copy(ArquivoAtual, "Copy-"+DCMInstanceNumber.zfill(5)+"-"+str(DCMNum)) print("Copiado de: ", ArquivoAtual, " Para: ", "Copy-"+DCMInstanceNumber+"-"+str(DCMNum)) # shutil.copy(ArquivoAtual, str(datetime.now()).replace(":","").replace(".","").replace(" ","").replace("-","")) # print("Copiado de: ", ArquivoAtual, " Para: ", str(datetime.now()).replace(":","").replace(".","").replace(" ","").replace("-","")) # os.chdir('..') DCMNum += 1 # Lista os arquivos e salva arquivo de texto # Testa se existe e remove if os.path.exists('ListaArquivos.txt'): os.remove('ListaArquivos.txt') print("Apagado ListaArquivo") #for dirname, dirnames, filenames in os.walk('.'): for dirname, dirnames, filenames in os.walk(tmpdirTomo): # print path to all subdirectories first. # for subdirname in dirnames: # print(os.path.join(dirname, subdirname)) for filename in filenames: #print(os.path.join(dirname, filename)) ArquivosListados = os.path.join(dirname, filename)+'\n' with open('ListaArquivos.txt', "a") as arq: print("Criado ListaArquivo 2") arq.write(ArquivosListados) arq.close() # Conta linhas do arquivo def obter_n_linhas (nomeDoArquivo): arquivo = open(nomeDoArquivo, "r") n_linhas = sum(1 for linha in arquivo) arquivo.close() return n_linhas NumeroLinhas = obter_n_linhas ('ListaArquivos.txt') # Le arquivo e cria pastas #ContadorLinhas = 0 with open('ListaArquivos.txt','r') as f: ListaArquivos=f.readlines() # PYEXIFINFO if platform.system() == "Darwin" or platform.system() == "Linux": print("EH MAC E LIN") for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip('\n') # mostra linha 1 sem o caractere de quebra de linha # print(ArquivoAtual) data = p.get_json(ArquivoAtual) data2 = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) with open("deletar.txt", "a") as arq: arq.write(data2) arq.close() palavra = "SeriesNumber" for line in open("deletar.txt"): if palavra in line: SeriesRaw = line SeriesLimpa1 = SeriesRaw.strip('"DICOM:SeriesNumber": "') SeriesLimpa2 = SeriesLimpa1.strip('",'+'\n') SeriesLimpo = SeriesLimpa2.strip(" ") print("SERIES", SeriesLimpo) if not os.path.exists(SeriesLimpo): os.mkdir(SeriesLimpo) shutil.copy(ArquivoAtual, SeriesLimpo) print("Copiado de: ", ArquivoAtual, " Para: ", SeriesLimpo) os.remove(ArquivoAtual) os.remove('deletar.txt') # PYTHON DICOM if platform.system() == "Windows": print("EH WIN") for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip('\n') # mostra linha 1 sem o caractere de quebra de linha print("AQUIVO ATUAL: "+ArquivoAtual) try: ds = dicom.dcmread(ArquivoAtual) series_number = ds.data_element("SeriesNumber") SeriesLimpa1 = str(series_number).strip('(0020, 0011) Series Number IS:') SeriesLimpo2 = SeriesLimpa1.strip('"') SeriesLimpo = SeriesLimpo2.strip(" ") except: print("Não rolou leitura do DICOM!") SeriesLimpo = "Error" if not os.path.exists(SeriesLimpo): os.mkdir(SeriesLimpo) print("Diretorio "+SeriesLimpo+" criado") #os.chdir(tmpdirTomo) shutil.copy(ArquivoAtual, SeriesLimpo) print("Copiado de: ", ArquivoAtual, " Para: ", SeriesLimpo) os.remove(ArquivoAtual) #os.remove('deletar.txt') # os.remove('deletar.txt') shutil.rmtree(tmpdirCopy+'COPY') shutil.rmtree(tmpdirCopy) print("CT-SCAN ready!") # abrir_tomo(tmpdirTomo+'_CT-SCAN') abrir_diretorio(tmpdirTomo) scn.my_tool.path = tmpdirTomo
def test_get_json(): loop = asyncio.get_event_loop() a = loop.run_until_complete(p.get_json(image)) assert len(a[0]) >= 25
def _extract_meta(self, filename, filepath): today = date.today() exifinfo = pexi.get_json(filepath)[0] meta = {key.split(':').pop(): value for key, value in exifinfo.items()} meta['Filename'] = meta.get('FileName', '') meta['Catalog'] = 'Exif' meta['Height'] = meta.get('ImageHeight', '') meta['Width'] = meta.get('ImageWidth', '') meta['ViewRotation'] = '0' meta['CollectionId'] = self.collection_id meta['Rights'] = meta.get('UsageTerms', '') meta['ISOSpeedRating'] = meta.get('ISO', '') meta['Latitude'] = meta.get('GPSLatitude') meta['Longitude'] = meta.get('GPSLongitude') meta['Altitude'] = meta.get('GPSAltitude') meta['SubjectReference'] = meta.get('SubjectCode') meta['Category'] = meta.get('SupplementalCategories') meta['Author'] = meta.get('Creator') meta['CaptureDate'] = meta.get('CreateDate') meta['Model'] = "{} {}".format(meta.get('Make'), meta.get('Model')) if meta.get('Instructions', False): meta['AdminNotes'] = meta.get('Instructions') meta['TechNotes'] = "<p>Imported by Mandala Bulk Image Importer reading " \ "image’s Exif metadata on {}.<p>".format(today.strftime("%B %d, %Y")) # Find Exif Headline field and use for "captions" meta['Title'] = None if 'Title' not in meta else meta['Title'].strip() meta['Caption'] = _process_desc_langs(meta.get('Headline', '')) if not meta['Title']: meta['Title'] = meta['Caption'][0]['text'] # Get descriptions from description field and add to captions matched by language descs = _process_desc_langs(meta.get('Description')) for desc in descs: foundit = False for ind, cobj in enumerate(meta['Caption']): if cobj['lang'] == desc['lang']: meta['Caption'][ind]['description'] = desc['text'] foundit = True if not foundit: meta['Caption'].append({ 'lang': desc['lang'], 'text': 'Untitled', 'description': desc['text'] }) # Deal with when title is empty if not meta['Title'] or meta['Title'] == '': meta['Title'] = meta['Caption'][0]['text'] if meta['Caption'][0][ 'text'] else 'Untitled' # Convert caption metadata to valid json meta['Caption'] = json.dumps(meta['Caption']) # Deal with Kmaps (SubjectCode gets turned into an array from semicolons by exiftool but other kmap fields do not) if 'SubjectCode' in meta: meta['SubjectCode'] = json.dumps(meta['SubjectCode']) if 'SubjectReference' in meta: meta['SubjectReference'] = json.dumps(meta['SubjectReference']) if 'IntellectualGenre' in meta: meta['IntellectualGenre'] = json.dumps( meta['IntellectualGenre'].split(';')) # Location: Bridge tool shows just "Sub-location" but Exiftool exports both "Location" and "Sub-location" if 'Sub-location' in meta: meta['Sub-location'] = json.dumps(meta['Sub-location'].split(';')) if 'Location' in meta: meta['Location'] = json.dumps(meta['Location'].split(';')) elif 'Sub-location' in meta: meta['Location'] = meta['Sub-location'] if 'Source' in meta: meta['Source'] = json.dumps(meta['Source'].split(';')) if 'ImageCreatorID' in meta: meta['ImageCreatorID'] = meta['ImageCreatorID'].split(';') if isinstance(meta['ImageCreatorID'], str) \ else meta['ImageCreatorID'] meta['ImageCreatorID'] = json.dumps(meta['ImageCreatorID']) if 'ImageCreatorName' in meta: meta['ImageCreatorName'] = meta['ImageCreatorName'].split(';') if isinstance(meta['ImageCreatorName'], str) \ else meta['ImageCreatorName'] meta['ImageCreatorName'] = json.dumps(meta['ImageCreatorName']) return meta
def AjustaTomoDef(self, context): scn = context.scene #Cria diretorios temporarios e copia o conteudo para um deles #os.chdir('DEL') # scene = context.scene # rd = scene.render if scn.my_tool.path == "": bpy.ops.object.dialog_operator_informe_dicom('INVOKE_DEFAULT') return {'FINISHED'} else: tmpdirCopy = tempfile.mkdtemp() tmpdirTomo = tempfile.mkdtemp() shutil.copytree(scn.my_tool.path, tmpdirCopy + 'COPY') # Lista os arquivos e salva arquivo de texto # Testa se existe e remove if os.path.exists('ListaArquivos.txt'): os.remove('ListaArquivos.txt') #for dirname, dirnames, filenames in os.walk('.'): for dirname, dirnames, filenames in os.walk(tmpdirCopy + 'COPY'): # print path to all subdirectories first. # for subdirname in dirnames: # print(os.path.join(dirname, subdirname)) for filename in filenames: #print(os.path.join(dirname, filename)) ArquivosListados = os.path.join(dirname, filename) + '\n' with open("ListaArquivos.txt", "a") as arq: arq.write(ArquivosListados) arq.close() # Conta linhas do arquivo def obter_n_linhas(nomeDoArquivo): arquivo = open(nomeDoArquivo, "r") n_linhas = sum(1 for linha in arquivo) arquivo.close() return n_linhas def InstanceNumber(Arquivo): try: ds = dicom.dcmread( Arquivo, force=True) # Diretório e arquivo concatenados #ds = dicom.dcmread(Arquivo, force=True) # Diretório e arquivo concatenados instance_number = ds.data_element("InstanceNumber") instanceLimpa1 = str(instance_number).split('IS: ') instanceLimpa2 = str(instanceLimpa1[1]).strip('"') except: print("Não rolou leitura do DICOM!") instanceLimpa2 = "Error" return instanceLimpa2 NumeroLinhas = obter_n_linhas('ListaArquivos.txt') #----------------------------------------------------------- # Le arquivo e cria pastas #ContadorLinhas = 0 with open('ListaArquivos.txt', 'r') as f: ListaArquivos = f.readlines() print("Criado Lista Arquivo1") DCMNum = 0 for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip( '\n') # mostra linha 1 sem o caractere de quebra de linha # print(ArquivoAtual) DCMInstanceNumber = InstanceNumber(ArquivoAtual) os.chdir(tmpdirTomo) shutil.copy( ArquivoAtual, "Copy-" + DCMInstanceNumber.zfill(5) + "-" + str(DCMNum)) print("Copiado de: ", ArquivoAtual, " Para: ", "Copy-" + DCMInstanceNumber + "-" + str(DCMNum)) # shutil.copy(ArquivoAtual, str(datetime.now()).replace(":","").replace(".","").replace(" ","").replace("-","")) # print("Copiado de: ", ArquivoAtual, " Para: ", str(datetime.now()).replace(":","").replace(".","").replace(" ","").replace("-","")) # os.chdir('..') DCMNum += 1 # Lista os arquivos e salva arquivo de texto # Testa se existe e remove if os.path.exists('ListaArquivos.txt'): os.remove('ListaArquivos.txt') print("Apagado ListaArquivo") #for dirname, dirnames, filenames in os.walk('.'): for dirname, dirnames, filenames in os.walk(tmpdirTomo): # print path to all subdirectories first. # for subdirname in dirnames: # print(os.path.join(dirname, subdirname)) for filename in filenames: #print(os.path.join(dirname, filename)) ArquivosListados = os.path.join(dirname, filename) + '\n' with open('ListaArquivos.txt', "a") as arq: print("Criado ListaArquivo 2") arq.write(ArquivosListados) arq.close() # Conta linhas do arquivo def obter_n_linhas(nomeDoArquivo): arquivo = open(nomeDoArquivo, "r") n_linhas = sum(1 for linha in arquivo) arquivo.close() return n_linhas NumeroLinhas = obter_n_linhas('ListaArquivos.txt') # Le arquivo e cria pastas #ContadorLinhas = 0 with open('ListaArquivos.txt', 'r') as f: ListaArquivos = f.readlines() # PYEXIFINFO if platform.system() == "Darwin" or platform.system() == "Linux": print("EH MAC E LIN") for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip( '\n') # mostra linha 1 sem o caractere de quebra de linha # print(ArquivoAtual) data = p.get_json(ArquivoAtual) data2 = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) with open("deletar.txt", "a") as arq: arq.write(data2) arq.close() palavra = "SeriesNumber" for line in open("deletar.txt"): if palavra in line: SeriesRaw = line SeriesLimpa1 = SeriesRaw.strip( '"DICOM:SeriesNumber": "') SeriesLimpa2 = SeriesLimpa1.strip('",' + '\n') SeriesLimpo = SeriesLimpa2.strip(" ") print("SERIES", SeriesLimpo) if SeriesLimpo != '': if not os.path.exists(SeriesLimpo): os.mkdir(SeriesLimpo) if SeriesLimpo != '': shutil.copy(ArquivoAtual, SeriesLimpo) print("Copiado de: ", ArquivoAtual, " Para: ", SeriesLimpo) os.remove(ArquivoAtual) os.remove('deletar.txt') # PYTHON DICOM if platform.system() == "Windows": print("EH WIN") for x in range(NumeroLinhas): ArquivoAtual = ListaArquivos[x].strip( '\n') # mostra linha 1 sem o caractere de quebra de linha print("AQUIVO ATUAL: " + ArquivoAtual) try: ds = dicom.dcmread(ArquivoAtual) series_number = ds.data_element("SeriesNumber") SeriesLimpa1 = str(series_number).strip( '(0020, 0011) Series Number IS:') SeriesLimpo2 = SeriesLimpa1.strip('"') SeriesLimpo = SeriesLimpo2.strip(" ") except: print("Não rolou leitura do DICOM!") SeriesLimpo = "Error" if not os.path.exists(SeriesLimpo): os.mkdir(SeriesLimpo) print("Diretorio " + SeriesLimpo + " criado") #os.chdir(tmpdirTomo) shutil.copy(ArquivoAtual, SeriesLimpo) print("Copiado de: ", ArquivoAtual, " Para: ", SeriesLimpo) os.remove(ArquivoAtual) #os.remove('deletar.txt') # os.remove('deletar.txt') try: shutil.rmtree(tmpdirCopy + 'COPY') shutil.rmtree(tmpdirCopy) except: print("Erro de permissão ao apagar os diretório do TMP!") print("CT-SCAN ready!") # Lista diretórios e arquivos try: tmpdirCSV = tempfile.mkdtemp() diretorio = tmpdirTomo + "/" lista_compara = [] lista = [ name for name in os.listdir(diretorio) if os.path.isdir(os.path.join(diretorio, name)) ] #print(lista) try: for i in lista: # print("\n") # print("Directory:", i) # print(os.listdir(diretorio+i)[0]) # print("Number of files:", len(os.listdir(diretorio+i))) ArquivoAtual = os.listdir(diretorio + i)[0] # print(diretorio+i+"/"+ArquivoAtual) ds = dicom.dcmread(diretorio + i + "/" + ArquivoAtual, force=True) if ds.data_element("SeriesDescription"): SeriesDescription = ds.data_element( "SeriesDescription") SeriesDescriptionLimpa1 = str(SeriesDescription).split( 'LO: ') SeriesDescriptionLimpa2 = str( SeriesDescriptionLimpa1[1]).strip('"') # print(SeriesDescriptionLimpa2) # print("Directory:", i, "|| Number of files:", len(os.listdir(diretorio+i)), "||", SeriesDescriptionLimpa2, "\n") lista_compara.append([ len(os.listdir(diretorio + i)), i, SeriesDescriptionLimpa2 ]) lista_compara.sort(reverse=True) #print("LISTA COMPARA!!!") #print(lista_compara) if not ds.data_element("SeriesDescription"): SeriesDescriptionLimpa2 = ("Erro O!") # print(SeriesDescriptionLimpa2) # print("Directory:", i, "|| Number of files:", len(os.listdir(diretorio+i)), "||", SeriesDescriptionLimpa2, "\n") lista_compara.append([ len(os.listdir(diretorio + i)), i, SeriesDescriptionLimpa2 ]) lista_compara.sort(reverse=True) except: print("Erro no SeriesDescription") try: SeriesDescriptionLimpa2 = ("Erro 1!") # print(SeriesDescriptionLimpa2) # print("Directory:", i, "|| Number of files:", len(os.listdir(diretorio+i)), "||", SeriesDescriptionLimpa2, "\n") lista_compara.append([ len(os.listdir(diretorio + i)), i, SeriesDescriptionLimpa2 ]) lista_compara.sort(reverse=True) except: print("Problema ao inserir o SeriesDescription!") lista_diretorios_mole = [] print("LISTA COMPARA MOLE", lista_compara) try: print("lista compara:", lista_compara) for i in lista_compara: print("Comecou a comparar!") print("i Atual:", i) if fnmatch.fnmatchcase(str( i[2]), "*PM*") or fnmatch.fnmatchcase( str(i[2]), "*P/M*") or fnmatch.fnmatchcase( str(i[2]), "*Soft*" ) or fnmatch.fnmatchcase( str(i[2]), "*Sft Tissue*" ) or fnmatch.fnmatchcase(str( i[2]), "*STD*") or fnmatch.fnmatchcase( str(i[2]), "*PARTES MOLES*" ) or fnmatch.fnmatchcase( str(i[2]), "*Head*") or fnmatch.fnmatchcase( str(i[2]), "*SEM CONTRASTE*" ) or fnmatch.fnmatchcase( str(i[2]), "*FACE*") or fnmatch.fnmatchcase( str(i[2]), "*Recon 3*" ) or fnmatch.fnmatchcase( str(i[2]), "*ARQUIVO*" ) or fnmatch.fnmatchcase( str(i[2]), "*RECON*" ) or fnmatch.fnmatchcase( str(i[2]), "*Cranio*" ) or fnmatch.fnmatchcase( str(i[2]), "*VOLUME STD*" ) or fnmatch.fnmatchcase( str(i[2]), "*VOL*" ) or fnmatch.fnmatchcase( str(i[2]), "*Imagens Processadas*"): print("Encontrou!") lista_diretorios_mole.append(i[1]) else: print("Não encontrou!") global diretorio_final_reconstruir_mole try: print("Diretorio final:", lista_diretorios_mole[0]) print("Comparou!") diretorio_final_reconstruir_mole = lista_diretorios_mole[ 0] except: print("Diretorio final:", lista_diretorios_mole ) # Não sei pq!!! Se coloco index dá erro! print("Comparou!") diretorio_final_reconstruir_mole = lista_diretorios_mole except: print( "Problema para encontrar diretório com tecido mole na tomo!" ) with open(tmpdirCSV + '/C-Scan_DATA.csv', mode='w') as centroid_file: report_writer = csv.writer(centroid_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) report_writer.writerow( ['DIRECTORY', 'NUMBER OF FILES', 'DESCRIPTION']) if lista_compara == []: report_writer.writerow(["No data", "No data", "No data"]) if lista_compara != []: for linha in lista_compara: report_writer.writerow([linha[1], linha[0], linha[2]]) print("Directory:", linha[1], "|| Number of files", linha[0], "|| Description:", linha[2]) try: if platform.system() == "Linux": # abrir_diretorio(tmpdir) subprocess.Popen("libreoffice " + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) if platform.system() == "Windows": # abrir_diretorio(tmpdir) subprocess.Popen( 'cd "C:/Program Files/LibreOffice/program/" & dir & soffice.bin ' + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) if platform.system() == "Darwin": # abrir_diretorio(tmpdir) subprocess.Popen( '/Applications/LibreOffice.app/Contents/MacOS/soffice ' + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) except: print("Não há programa atribuído ao CSV!") except: print( "Algum problema aconteceu com a leitura dos dados do tomógrafo." ) try: with open(tmpdirCSV + '/C-Scan_DATA.csv', mode='w') as centroid_file: report_writer = csv.writer(centroid_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) report_writer.writerow( ['DIRECTORY', 'NUMBER OF FILES', 'DESCRIPTION']) if lista_compara == []: report_writer.writerow( ["No data", "No data", "No data"]) if lista_compara != []: for linha in lista_compara: report_writer.writerow( [linha[1], linha[0], linha[2]]) print("Directory:", linha[1], "|| Number of files", linha[0], "|| Description:", linha[2]) try: if platform.system() == "Linux": # abrir_diretorio(tmpdir) subprocess.Popen("libreoffice " + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) if platform.system() == "Windows": # abrir_diretorio(tmpdir) subprocess.Popen( 'cd "C:/Program Files/LibreOffice/program/" & dir & soffice.bin ' + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) if platform.system() == "Darwin": # abrir_diretorio(tmpdir) subprocess.Popen( '/Applications/LibreOffice.app/Contents/MacOS/soffice ' + tmpdirCSV + "/C-Scan_DATA.csv", shell=True) except: print("Não há programa atribuído ao CSV!") except: print("Problemas com o CSV!") # Atualiza path # abrir_diretorio(tmpdirTomo) scn.my_tool.path = tmpdirTomo + "/" try: with open(tmpdirTomo + '/AUTOEXPDIR.txt', "a") as arq: if diretorio_final_reconstruir_mole: arq.write(tmpdirTomo + "/" + diretorio_final_reconstruir_mole) if not diretorio_final_reconstruir_mole: arq.write(tmpdirTomo + "/") arq.close() except: print("Algum problema com a variável global do tecido mole!")
def get_file_exif(self): exif = pyexifinfo.get_json(str(self.file_path))[0] unknown = ' ' payload = {} try: payload = { 'iso': int(exif['EXIF:ISO']) if 'EXIF:ISO' in exif else 0, 'width': int(exif['EXIF:ImageWidth']) if 'EXIF:ImageWidth' in exif else 0, 'flash': True if 'EXIF:Flash' in exif['EXIF:Flash'] and 'on' in exif['EXIF:Flash'].lower() else False, 'camera_serial': str(exif['MakerNotes:SerialNumber']) if 'MakerNotes:SerialNumber' in exif else unknown, 'aperture': str(exif['EXIF:ApertureValue']) if 'EXIF:ApertureValue' in exif else unknown, 'focal_length': str(exif['EXIF:FocalLength']) if 'EXIF:FocalLength' in exif else unknown, 'camera_firmware': str(exif['MakerNotes:FirmwareVersion']) if 'MakerNotes:FirmwareVersion' in exif else unknown, 'shooting_mode': str(exif['Composite:ShootingMode']) if 'Composite:ShootingMode' in exif else unknown, 'max_focal_length': str(exif['MakerNotes:MaxFocalLength']) if 'MakerNotes:MaxFocalLength' in exif else unknown, 'lens_type': str(exif['MakerNotes:LensType']) if 'MakerNotes:LensType' in exif else unknown, 'height': int(exif['MakerNotes:OriginalImageHeight']) if 'MakerNotes:OriginalImageHeight' in exif else 0, 'shutter_speed': str(exif['Composite:ShutterSpeed']) if 'Composite:ShutterSpeed' in exif else unknown, 'white_balance': str(exif['EXIF:WhiteBalance']) if 'EXIF:WhiteBalance' in exif else unknown, 'megapixels': str(exif['Composite:Megapixels']) if 'Composite:Megapixels' in exif else 0, 'created_datetime': str(exif['EXIF:CreateDate']) if 'EXIF:CreateDate' in exif else unknown, 'quality': str(exif['MakerNotes:Quality']) if 'MakerNotes:Quality' in exif else unknown, 'file_type': str(exif['File:FileType']) if 'File:FileType' in exif else unknown, 'continuous_drive': str(exif['MakerNotes:ContinuousDrive']) if 'MakerNotes:ContinuousDrive' in exif else unknown, 'file_size': str(exif['File:FileSize']) if 'File:FileSize' in exif else unknown, 'orientation': str(exif['EXIF:Orientation']) if 'EXIF:Orientation' in exif else unknown, 'camera_manufacture': str(exif['EXIF:Make']) if 'EXIF:Make' in exif else unknown, 'shutter_speed': str(exif['EXIF:ShutterSpeedValue']) if 'EXIF:ShutterSpeedValue' in exif else unknown, 'self_timer': True if 'MakerNotes:SelfTimer' in exif and 'on' in exif['MakerNotes:SelfTimer'].lower() else False } except Exception as e: log.error(e) return payload
def exif(self, path): # Exif info exifreport = pyexifinfo.get_json(path) result = dict((key, value) for key, value in exifreport[0].items() if not (key.startswith("File") or key.startswith("SourceFile"))) return result
def test_get_json(): a = p.get_json(image) assert len(a[0]) == 25
print (">>>>>>>>>>>>>>>>>>>>>>>>>>>>") ## Run suffix check and filetype check, rename if so wished print (">>> FILE: ", filename) magik = magic.from_file(filename) print (">>> FILE MAGIC: ", magik) filetype = get_file_type(filename) print (">>> CORRECT FILE SUFFIX: ", filetype) if filetype is not None and not filename.lower().endswith(filetype): # case insensitive print("BLIMEY, THERE'S NO SUFFIX.....") if (rename): print("ADDING SUFFIX TO FILE!") os.rename(filename, filename + "." + filetype) # ## Get ALL the metadata from the file data = p.get_json(filename) print( json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')) ) # # ## DUMP again with piexif # exif_dict = piexif.load(filename) # for ifd in ("0th", "Exif", "GPS", "1st"): # for tag in exif_dict[ifd]: # print(piexif.TAGS[ifd][tag]["name"], exif_dict[ifd][tag]) # Reading any of: # VIDEO: 3ga, 3gp, avi, dv, flv, m4v, mkv, mov, mp4, mpg, webm, wmv # AUDIO: amr, m4a, mp3 # IMAGE: bmp, gif, jpg, png, pnm, tif # do roughly as in https://github.com/danryu/misc-utils/blob/master/metaDateFix.sh
def process(self, file): # handle file uploads based on mode of ETL print(file) duration = None if self.meta.get('mode') == 2: self.summary += '------------------------------------------------------------------------ \n' self.summary += now() + 'file: {}'.format( file.get('file').get('name')) + '\n' # check here for duplicate to skip unnecessary code execution old_path = file.get('file').get('path') # get md5 hash directly from the source file (before copying it) f = open(old_path, 'rb').read() etag = hashlib.md5(f).hexdigest() exists = Media.query.filter(Media.etag == etag).first() if exists: self.summary += now() + ' File already exists: {} \n'.format( old_path) print('File already exists: {} \n'.format(old_path)) self.summary += '------------------------------------------------------------------------\n\n' self.log.write(self.summary) return #server side mode, need to copy files and generate etags old_filename = file.get('file').get('name') title, ext = os.path.splitext(old_filename) filename = Media.generate_file_name(old_filename) filepath = (Media.media_dir / filename).as_posix() # check if file is video (accepted extension) if ext[1:].lower() in cfg.ETL_VID_EXT: try: # get video duration via ffprobe cmd = 'ffprobe -i "{}" -show_entries format=duration -v quiet -of csv="p=0"'.format( old_path) duration = subprocess.check_output( cmd, shell=True).strip().decode('utf-8') except Exception as e: print('failed to get video duration') print(e) if self.meta.get('optimize'): #process video try: filepath = '{}.mp4'.format( os.path.splitext(filepath)[0]) command = 'ffmpeg -i "{}" -vcodec libx264 -acodec aac -strict -2 "{}"'.format( old_path, filepath) subprocess.call(command, shell=True) #if conversion is successful / also update the filename passed to media creation code filename = os.path.basename(filepath) except Exception as e: print( 'An exception occurred while transcoding file {}'. format(e)) #copy the file as is instead shutil.copy(old_path, filepath) else: shutil.copy(old_path, filepath) self.summary += now() + ' File saved as {}'.format(filename) + '\n' elif self.meta.get('mode') == 1: self.summary += now( ) + ' ------ Processing file: {} ------'.format( file.get('filename')) + '\n' # we already have the file and the etag filename = file.get('filename') n, ext = os.path.splitext(filename) title, ex = os.path.splitext(file.get('name')) filepath = (Media.media_dir / filename).as_posix() etag = file.get('etag') # check here for duplicate to skip unnecessary code execution exists = Media.query.filter(Media.etag == etag).first() #print (exists) if exists: self.summary += now() + ' File already exists: {} \n'.format( filepath) try: os.remove(filepath) print('duplicate file cleaned ') except OSError: pass print('File already exists: {} \n'.format(filepath)) self.summary += '------------------------------------------------------------------------\n\n' self.log.write(self.summary) return "This file already exists" # else check if video processing is enabled if ext[1:].lower() in cfg.ETL_VID_EXT: try: # get video duration via ffprobe cmd = 'ffprobe -i "{}" -show_entries format=duration -v quiet -of csv="p=0"'.format( filepath) duration = subprocess.check_output( cmd, shell=True).strip().decode('utf-8') except Exception as e: print('failed to get video duration') print(e) if self.meta.get('optimize'): # process videos in the media try: new_filepath = '{}*.mp4'.format( os.path.splitext(filepath)[0]) command = 'ffmpeg -i "{}" -vcodec libx264 -acodec aac -strict -2 "{}"'.format( filepath, new_filepath) subprocess.call(command, shell=True) #if conversion is successful / also update the filename passed to media creation code filename = os.path.basename(new_filepath) #clean up old file os.remove(filepath) #if op is successful update filepath filepath = new_filepath except Exception as e: print( 'An exception occurred while transcoding file {}'. format(e)) # do nothing # get mime type # mime = magic.Magic(mime=True) # mime_type = mime.from_file(filepath) #print('Hash generated :: {}'.format(etag)) info = exiflib.get_json(filepath)[0] #print(info.get('EXIF:CreateDate')) # bundle title with json info info['bulletinTitle'] = title info['filename'] = filename # pass filepath for cleanup purposes info['filepath'] = filepath info['etag'] = etag if self.meta.get('mode') == 2: info['old_path'] = old_path # pass duration if duration: info['vduration'] = duration print('=====================') print('Meta data parse success') result = self.create_bulletin(info) return result
def test(image): e = exif.get_xml(image) json = exif.get_json(image) print json[0]
def main(): for name in os.listdir("."): if os.path.isdir(name): print("ignoring", name, "because it's a directory") continue else: lower_name = name.lower() if not lower_name.endswith( ('.jpg', '.png', '.gif', '.bmp', '.tif', '.heic', '.xmp', '.mov', '.mp4', '.jpeg', '.aifc', '.pdf', '.m4v', '.ico', '.m4a')): print("ignoring \"" + name + "\" because it's not a recognized image or video format") continue try: info = pyexifinfo.get_json(name)[0] possible_keys = [ 'EXIF:DateTimeOriginal', 'XMP:DateCreated', 'QuickTime:CreationDate', 'File:FileModifyDate' ] modified_key_only = False key = 'key not found' for try_key in possible_keys: if try_key in info: key = try_key if key == possible_keys[-1]: print( "WARNING:", name, "only has file *modified* key, so may be incorrect date" ) modified_key_only = True break if key == 'key not found': print(name, "has no known DateCreated key") continue date = str(info[key]) arr = arrow.get(date, ['YYYY:MM:DD HH:mm:ssZZ', 'YYYY:MM:DD HH:mm:ss']) except arrow.parser.ParserError: print("Ignoring", name, "since it contains no key with viable creation date.") else: # NOTE: I actually prefer just having names using local time. For example, the local EST # might be 12:28:12, EST is -04:00, so the utc time is 16:28:12. To me it's more # intuitive to just use the local time like all the other files use by default. #newNameStart=arr.to('utc').format('YYYY:MM:DD_HH:mm:ss-') newNameStart = arr.format('YYYY.MM.DD_HH.mm.ss-') if modified_key_only: newNameStart = "last_modified-" + newNameStart if not name.startswith(newNameStart): newName = newNameStart + name print("renaming", name, "to", newName) #os.rename(name,newName) else: print( "ignoring", name, "since it already seems to be correctly prefixed with creation date" )
def main(): parser = argparse.ArgumentParser() parser.add_argument("-s", "--source", help="source directory (processed recursively)", required=True) parser.add_argument("-d", "--destination", help="destination directory", required=True) parser.add_argument("-D", "--debug", help="increase verbosity", action="store_true") parser.add_argument("--height", help="max. height of the exported file", type=int) parser.add_argument("--width", help="max. width of the exported file", type=int) args = parser.parse_args() top = args.source bottom = args.destination if args.height: height = args.height else: height = 2048 if args.width: width = args.width else: width = 2048 for root, dirs, filenames in os.walk(top): for filename in filenames: full_path = os.path.join(root, filename) if filename.upper().endswith(CR2_RAW) or filename.upper().endswith(DNG_RAW) or re.search(JPG_RGX, filename): full_path_info = pyexifinfo.get_json(full_path)[0] # returned as list, so grab only 1st value date_time_original = full_path_info.get(DATETIME_ORIG) if not date_time_original: print("Warning: Skipping date-time-less file: {0}".format(full_path)) continue date_time_directory = str(date_time_original).split(' ')[0].replace(':','-') date_time_genprefix = str(date_time_original).rsplit(':', 1)[0].replace(':', '').replace(' ','') new_filename = filename.rsplit('.', 1)[0] + '.jpg' full_path_destination_dir = os.path.join(bottom, date_time_directory) full_path_destination_path = os.path.join(full_path_destination_dir, str(date_time_genprefix + '_' + new_filename)) if filename.upper().endswith(CR2_RAW) or filename.upper().endswith(DNG_RAW): command_line = DARKTABLECLI_PATH command_line += " " + DARKTABLECLI_HEIGHT + " " + str(height) command_line += " " + DARKTABLECLI_WIDTH + " " + str(width) elif re.search(JPG_RGX, filename): command_line = CONVERT_PATH command_line += " " + CONVERT_WH + " " + str(height) + "x" + str(width) + "\>" command_line += " '" + full_path + "' '" + full_path_destination_path + "' " if args.debug: # pprint.pprint(pyexifinfo.get_json(full_path)) print print('#DEBUG START#') print(full_path) print(date_time_original) print(date_time_directory) print(date_time_genprefix) print(new_filename) print(full_path_destination_dir) print(full_path_destination_path) print(command_line) print('#DEBUG END#') if not os.path.exists(full_path_destination_dir): print 'Creating directory: ' + full_path_destination_dir os.makedirs(full_path_destination_dir) if not os.path.exists(full_path_destination_path): print 'Generating: ' + full_path_destination_path os.system(command_line) else: print 'Skipping: ' + full_path_destination_path else: print("Warning: Skipping undesired file: {0}".format(full_path))
os.system("convert "+image_name+" "+png_image_name) bw_image_name = directory+"/"+filename + "_bw"+extension Image.open(image_name).convert('1').save(bw_image_name) jpg_text = pytesseract.image_to_string(Image.open(jpg_image_name)) orig_text = pytesseract.image_to_string(Image.open(image_name)) png_text = pytesseract.image_to_string(Image.open(png_image_name)) bw_text = pytesseract.image_to_string(Image.open(bw_image_name)) print "-------"*7 print "OCR results: \n" print " - Original: %s \n - JPG: %s \n - PNG: %s \n - B/W: %s" % (orig_text, jpg_text, png_text, bw_text) if metadata_check: print "-------"*7 print "Metadata\n" info = pyexifinfo.get_json(image_name) for i in info[0]: print "%s: %s" % (i, info[0].get(i)) if png_8bit and extension == ".png": print "-------"*7 print "8-bit image - Changing pallette. Extracting to %s" % (directory_pallette) # for i in {0..255}; do ./change_palette.py doge_stege.png "single-color-${i}.png" "${i}" for color_n in range(0,255): palette_image = directory_pallette + "/" + filename+"_"+str(color_n)+extension fn.palette_func(image_name, palette_image, color_n) if list_colors: csv_images_file = directory + "/" + image_name+"pixels.csv"
def file_analysis(filepath): p.ver() py = p.get_json(filepath)[0] try: pe = pefile.PE(filepath) except pefile.PEFormatError: pass pejson = {} try: pejson["Filename"] = py["File:FileName"] except: pass try: pejson["FileType"] = py["File:FileType"] except: pass try: pejson["FileSize"] = py["File:FileSize"] except: pass try: pejson["MIMEType"] = py["File:MIMEType"] except: pass try: pejson["Entropy"] = pe.OPTIONAL_HEADER.AddressOfEntryPoint except: pass try: pejson["ImageVersion"] = py["EXE:ImageVersion"] except: pass try: pejson["LinkerVersion"] = py["EXE:LinkerVersion"] except: pass try: pejson["MachineType"] = py["EXE:MachineType"] except: pass try: pejson["CPUArchitecture"] = py["EXE:CPUArchitecture"] except: pass try: pejson["CPUByteOrder"] = py["EXE:CPUByteOrder"] except: pass try: pejson["CPUType"] = py["EXE:CPUType"] except: pass try: pejson["ObjectFileType"] = py["ObjectFileType"] except: pass try: pejson["PEType"] = py["EXE:PEType"] except: pass try: pejson["Subsystem"] = py["EXE:Subsystem"] except: pass try: pejson["TimeStamp"] = py["EXE:TimeStamp"] except: pass #pejson["Trid"] = trid(filepath) return pejson
def rotate_jpeg(filename, manifest_mode=False): global image_dir uploads_logger = logging.getLogger('media_downloader.rotate_jpeg') #print(filename) #print(image_dir) exif_dict = {} dt_str = "" dt_epoch = None target_file = filename uploads_logger.info("Opening image: %s" % filename) img = Image.open(filename) if "exif" in img.info: uploads_logger.debug("exif exists") exif_dict = piexif.load(img.info["exif"]) #print(exif_dict) # extract the datetime from the tags, if available if 36867 in exif_dict['Exif'].keys(): uploads_logger.debug(exif_dict['Exif'][36867]) exif_dt_str = exif_dict['Exif'][36867].decode("utf-8") uploads_logger.debug(exif_dt_str) date_str = exif_dt_str.split()[0].replace(":", "-") uploads_logger.debug(date_str) dt_str = "%sT%s" % (date_str, exif_dt_str.split()[1]) uploads_logger.debug(dt_str) #check for valid format dt_str_tokens = dt_str.split("-") if len(dt_str_tokens) == 3: if (int(dt_str_tokens[0]) > 0) and (int(dt_str_tokens[1]) > 0): uploads_logger.info("Validated Timestamp ...") utc_time = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S") uploads_logger.info(utc_time) epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds() dt_epoch = epoch_time else: file_stats = os.stat(filename) uploads_logger.debug(file_stats) file_mod_epoch = "%d" % file_stats.st_mtime dt_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(file_stats.st_mtime)) dt_epoch = file_stats.st_mtime uploads_logger.debug(dt_str) exif_dict.pop("thumbnail") uploads_logger.info("Now checking if the orientation exists ...") if piexif.ImageIFD.Orientation in exif_dict["0th"]: uploads_logger.debug("orientation attributes exists") orientation = exif_dict["0th"].pop(piexif.ImageIFD.Orientation) # HACK to avoid the error due to a misformatted exif tag if 41729 in exif_dict['Exif'].keys(): exif_dict['Exif'].pop(41729) exif_bytes = piexif.dump(exif_dict) uploads_logger.debug(orientation) # HACK on 07/19/2019 to avoid the rotate since its messing things up!! ''' if orientation == 1: uploads_logger.debug("detected orientation 1 ... rotating now!") img = img.rotate(-90, expand=True) elif orientation == 2: img = img.transpose(Image.FLIP_LEFT_RIGHT) elif orientation == 3: img = img.rotate(180) elif orientation == 4: img = img.rotate(180).transpose(Image.FLIP_LEFT_RIGHT) elif orientation == 5: img = img.rotate(-90, expand=True).transpose(Image.FLIP_LEFT_RIGHT) elif orientation == 6: img = img.rotate(-90, expand=True) elif orientation == 7: img = img.rotate(90, expand=True).transpose(Image.FLIP_LEFT_RIGHT) elif orientation == 8: img = img.rotate(90, expand=True) ''' uploads_logger.debug("just before saving ... ") target_file = "/tmp/%s" % os.path.basename(filename) #img.save(target_file) img.save(target_file, exif=exif_bytes) uploads_logger.info("Saved new file ...") else: uploads_logger.debug("no orientation attributes") else: uploads_logger.info("... fallback calc of file datetime") file_stats = os.stat(filename) uploads_logger.debug(file_stats) file_mod_epoch = "%d" % file_stats.st_mtime dt_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(file_stats.st_mtime)) dt_epoch = file_stats.st_mtime uploads_logger.debug(dt_str) uploads_logger.debug(dt_epoch) # now extract the exif_dict using the alt method exif_dict = p.get_json(filename)[0] #print(exif_dict) file_info = {} file_info["target_file"] = target_file file_info["exif_dict"] = exif_dict # create an epoch date epoch_dt_str = datetime.utcfromtimestamp(0).strftime("%m-%d-%YT%H:%M:%S") epoch_val = 0 if not dt_str: dt_str = epoch_dt_str uploads_logger.debug(dt_str) file_info["datetime_str"] = dt_str if not dt_epoch: dt_epoch = epoch_val uploads_logger.debug(dt_epoch) file_info["datetime_epoch"] = dt_epoch # now setup the album tags uploads_logger.info("Now figuring out album tags ...") album_tags = {} #print(filename) uploads_logger.debug(manifest_mode) uploads_logger.debug(image_dir) if manifest_mode: # generate a fake imagedir for the file image_dir = os.path.dirname(filename) path_tokens = filename.split("%s/" % image_dir)[1] #print(path_tokens.split("/")) for index, token in enumerate(path_tokens.split("/")): if not Path(token).suffix: #print("this is an album tag: %s" % token) album_tag = "album_tag_%02d" % (index + 1) album_tags[album_tag] = token #print(album_tags) file_info["album_tags"] = album_tags #print(file_info.keys()) return file_info