def metadataCheck(filename, md5): with exiftool.ExifTool() as et: metadata = et.get_metadata(filename) for key in uselessexifkey: del metadata[key] hits = [] for key, value in metadata.iteritems(): for sig in badmetalist: if sig == value: logging.critical(timestamp() + ": Bad Metadata Alert: " + key + ":" + value + " MD5:" + md5) hits.append("Bad_Meta:" + key + value) if hits: metadata[u'Metadata_Alerts'] = str(hits).replace("u'", "").replace("'", '') else: metadata[u'Metadata_Alerts'] = 'None' return metadata
def name_from_exif(file_path, exif_mime_type_key): filename, file_extension = os.path.splitext(file_path) exif_Executable = "/usr/bin/exiftool" with exiftool.ExifTool() as et: exif = et.get_metadata(file_path) # print(exif) if exif_mime_type_key in exif: date_and_time = exif[exif_mime_type_key] parsed_date = datetime.datetime.strptime(date_and_time, '%Y:%m:%d %H:%M:%S') new_name = parsed_date.strftime( "%Y-%m-%d-at-%Hh-%Mm-%S") + file_extension year = parsed_date.strftime("%Y") month = parsed_date.strftime("%m") return new_name, year, month else: return md5sum(file_path) + file_extension, None, None
def make_temp_image(basename, png): # load exif data with exiftool.ExifTool() as et: metadata = et.get_metadata(basename + '.jpg') planck_r1 = metadata[u'APP1:PlanckR1'] planck_r2 = metadata[u'APP1:PlanckR2'] planck_b = metadata[u'APP1:PlanckB'] planck_o = metadata[u'APP1:PlanckO'] planck_f = metadata[u'APP1:PlanckF'] # sometimes there are 0 values in a row; fix them by setting to min if png.min() < -planck_o: # problem with png file temp_min = png[png > 10].min() png[png <= 10] = temp_min temp_func = np.vectorize(lambda x: calc_temp(x, planck_r1, planck_r2, planck_b, planck_o, planck_f)) return temp_func(png)
def get_meta_data(filein): """ catch all for gathering meta data, tested on: R3D, RAF, JPG, MOV :param filein: :return: """ files = [] if filein.endswith('R3D'): return get_red_data(filein) # File types tested: RAF, JPG if isinstance(filein, basestring): files = [filein] elif isinstance(filein, list): files = filein with exiftool.ExifTool() as et: metadata = et.get_metadata_batch(files) return metadata[0]
def probedata(filepath, group="ALL", uuid=None, showgroups=False): """Uses ExifTool to probe metadata of a media file and return a python dict with those metadata. Args: filepath (str): File object of the main data file (e.g. foo.avi). group (str): (optional) fetch ALL,EXIF,XMP... tags. uuid (str): (optional) if uuid is defined and the group is ALL or Custom then fetch tags from the sqlite database as well. showgroups (optional): will return results as a dictionary of dictionaries for all the groups (useful for separating tag groups). Raises: ExifException, DBException """ et = exiftool.ExifTool(filepath) res = et.probe(group) if res == None: raise ExifException("Could not recognise file format") if uuid and (group == "ALL" or group == "Custom"): res.update(db.loadtags(uuid)) return res
def exifread(image_loc): # EXIF READ: Read and store the specific EXIF tags of an image with exiftool.ExifTool() as et: try: img_width = float(et.get_tag('EXIF:ExifImageWidth', image_loc)) img_height = float(et.get_tag('EXIF:ExifImageHeight', image_loc)) cam_yaw = float(str(et.get_tag('XMP:GimbalYawDegree', image_loc))) cam_pitch = float(str(et.get_tag('XMP:GimbalPitchDegree', image_loc))) cam_roll = float(str(et.get_tag('XMP:GimbalRollDegree', image_loc))) flight_yaw = float(str(et.get_tag('XMP:FlightYawDegree', image_loc))) rel_alt = float(str(et.get_tag('XMP:RelativeAltitude', image_loc))) foc_len = float(et.get_tag('EXIF:FocalLength', image_loc)) lat = (et.get_tag('EXIF:GPSLatitude', image_loc)) lon = (et.get_tag('EXIF:GPSLongitude', image_loc)) return img_width, img_height, cam_yaw, cam_pitch, cam_roll, flight_yaw, rel_alt, \ foc_len, lat, lon except: return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
def __import_original__(self, original_path): """ safe-copy an image from original_path into the package and set all metadata """ logger = logging.getLogger(sys._getframe().f_code.co_name) # verify and copy the original image real_path = validate_path(original_path, 'file') filename, extension = os.path.splitext(real_path) self.original = '.'.join( ('original', EXTENSIONS[extension[1:].lower()])) dest_path = os.path.join(self.path, self.original) # fix up filename extensions hash_orig = safe_copy(real_path, dest_path) self.__append_event__( 'copied original file from {src} to {dest}'.format(src=real_path, dest=dest_path)) self.manifest.set(self.original, hash_orig) # capture and store metadata from the original file using exiftool # note this could be optimized by re-using a single exiftool instance, but code refactoring will have to happen with exiftool.ExifTool() as et: metadata = et.get_metadata_batch([ dest_path, ]) exif_path = os.path.join(self.path, 'original-exif.json') with open(exif_path, 'w') as exif_file: for d in metadata: json.dump(d, exif_file, sort_keys=True, indent=4) logger.debug( 'wrote exiftool metadata for original file on {exif_path}'. format(exif_path=exif_path)) for k in d.keys(): logger.debug("exiftool found: {key}='{value}'".format( key=k, value=d[k])) hash_exif = hash_of_file(exif_path) self.__append_event__( 'wrote exif extracted from original file in json format on {exif_path}' .format(exif_path=exif_path)) self.manifest.set('original-exif.json', hash_exif) # capture and store technical metadata using jhove (TBD) logger.warning('no jhove metadata is created')
def get_comment(img_path): if not os.path.isfile(img_path): print("Not a valid image path.") return None with exiftool.ExifTool() as et: metadata = et.get_metadata(img_path) exif_img_desc = metadata.get("EXIF:ImageDescription") caption_abst = metadata.get("IPTC:Caption-Abstract") if exif_img_desc and caption_abst: input( "Found caption in multiple EXIF tags for %s. Unhandled case." % os.path.basename(img_path)) elif exif_img_desc: return exif_img_desc elif caption_abst: return caption_abst else: return None
def search_at_mapillary(self, photo): exiftool.executable = self.exiftool_path #convert vales from exiftool to webservice format with exiftool.ExifTool() as et: metadata = et.get_metadata(photo) datetimestring = self.get_photo_timestamp(metadata) l = list(datetimestring) l[4] = '-' l[7] = '-' datetimestring = ''.join(l) dt = dateparser.parse(datetimestring) timestamp = int(time.mktime(dt.timetuple())) timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S') mapillry_instance = mapillary() result = mapillry_instance.search_mapillary(timestamp) return result
def read_exif(files): """ :param files: :return: """ exif_array = [] filename = file_name bar = Bar('Reading EXIF Data', max=len(files)) with exiftool.ExifTool() as et: # print(color.BLUE + "Scanning image exif tags " + color.END) metadata = iter(et.get_metadata_batch(files)) for d in metadata: exif_array.append(d) bar.next() bar.finish() # print(color.BLUE + "Scanning images complete " + color.END) formatted = format_data(exif_array) writeOutputtoText(filename, formatted) print(color.GREEN + "Process Complete." + color.END)
def filter_gps_metadata(paths): """Filter out metadata records that don't have GPS information. :param paths: Picture filenames to get metadata from :type paths: list(str) :returns: Picture files with GPS data :rtype: list(dict(str)) """ with exiftool.ExifTool() as tool: metadata_records = tool.get_tags_batch(TAGS, paths) gps_metadata_records = [ metadata_record for metadata_record in metadata_records if validate_gps_metadata(metadata_record) ] return gps_metadata_records
def assign_uuid(filepath, overwrite=False): """Reads EXIF:ImageUniqueID tag for valid uuid. If no uuid exists, write mint_uuid() to EXIF:ImageUniqueID tag.""" with exiftool.ExifTool() as et: uuid = et.get_tag('ImageUniqueID', filepath) # By default, if the `EXIF:ImageUniqueID` tag is empty, uuid is assigned to None. if (uuid is not None) and (overwrite is False): print("Tag EXIF:ImageUniqueID={} already exists in file {}.".format(uuid, filepath)) # Else, no uuid was read by exiftool, or overwrite has been set to True. else: et.execute('-ImageUniqueID={}'.format(mint_uuid()).encode(), filepath.encode()) os.remove(filepath+"_original") # Here, we only report back if the image file's uuid was updated. uuid = et.get_tag('ImageUniqueID', filepath) if uuid is not None: print("Wrote tag EXIF:ImageUniqueID={} to file {}.".format(uuid, filepath)) return uuid
def __init__( self, options, title='Select', log = False, arrow="-->", footer=mainFooter, more="...", border="||--++++", c_selected="[X]", c_empty="[ ]" ): self.title = title self.arrow = arrow self.footer = footer self.more = more self.border = border self.c_selected = c_selected self.c_empty = c_empty self.filterMsg = "" self.showAction = False self.selectIdx = False self.options = options self.all_options = [] self.byIdx = {} self.et = exiftool.ExifTool() self.logEn = log self.showMetaData= False if self.logEn: self.initLog() for idx, option in enumerate(options): d = { "label" : option , "selected": False , "idx" : idx , "metadata": MediaInfo() , "offset" : 0 } self.byIdx[idx] = d self.all_options.append(d) self.length = len(self.all_options)
def from_directory(cls, directory, progress_callback=None, exiftool_path=None): """ Create and ImageSet recursively from the files in a directory """ cls.basedir = directory matches = [] for root, dirnames, filenames in os.walk(directory): # for root, dirnames, filenames in os.walk(directory, topdown=False): for filename in fnmatch.filter(filenames, '*.tif'): matches.append(os.path.join(root, filename)) images = [] if exiftool_path is None and os.environ.get('exiftoolpath') is not None: exiftool_path = os.path.normpath(os.environ.get('exiftoolpath')) with exiftool.ExifTool(exiftool_path) as exift: for i,path in enumerate(matches): images.append(image.Image(path, exiftool_obj=exift)) if progress_callback is not None: progress_callback(float(i)/float(len(matches))) # create a dictionary to index the images so we can sort them # into captures # { # "capture_id": [img1, img2, ...] # } captures_index = {} for img in images: c = captures_index.get(img.capture_id) if c is not None: c.append(img) else: captures_index[img.capture_id] = [img] captures = [] for cap_imgs in captures_index: imgs = captures_index[cap_imgs] newcap = capture.Capture(imgs) captures.append(newcap) if progress_callback is not None: progress_callback(1.0) return cls(captures)
def ready_for_mapillary(self, photo, skip_words=None): #check if photo has lat, lon and direction tags exiftool.executable = self.exiftool_path #convert vales from exiftool to webservice format #import json with exiftool.ExifTool() as et: metadata = et.get_metadata(photo) #print(json.dumps(metadata, indent = 4)) direction = self._get_if_exist(metadata, "EXIF:GPSImgDirection") if direction is None: return False lat = self._get_if_exist(metadata, "EXIF:GPSLatitude") if lat is None: return False if skip_words is not None: for word in skip_words: if word in os.path.basename(str(photo)): return False return True
def get_metadata(malware_path): with exiftool.ExifTool() as et: metadata = et.get_metadata(malware_path) del metadata[u'SourceFile'] del metadata[u'File:FilePermissions'] del metadata[u'File:Directory'] del metadata[u'ExifTool:ExifToolVersion'] try: del metadata[u'File:MIMEType'] except Exception as e: print("[WARNING] ", e) metadataString = '\n' for each in metadata.items(): try: metadataString += '\t\t' + str(each[0]).split( ':', 1)[-1] + ": " + str(each[1]) + "\n" except Exception as e: continue return metadataString
def contextMenuHandler(action): global isSuspended if action.text() == 'Pause': isSuspended = True # quit full screen mode newWin.showMaximized() elif action.text() == 'Full Screen': if action.isChecked(): newWin.showFullScreen() else: newWin.showMaximized() elif action.text() == 'Resume': newWin.close() isSuspended = False playDiaporama(diaporamaGenerator, parent=window) # rating : the tag is written into the .mie file; the file is # created if needed. elif action.text() in ['0', '1', '2', '3', '4', '5']: with exiftool.ExifTool() as e: e.writeXMPTag(name, 'XMP:rating', int(action.text()))
def task2(): """ How many files contain Version: 1.1 in their metadata? Note: move this scrip inside ./extracted lookingfor = {'SourceFile': '4jGg.txt', 'ExifTool:ExifToolVersion': 11.94, 'File:FileName': '4jGg.txt', 'File:Directory': '.', 'File:FileSize': 2844, 'File:FileModifyDate': '2020:05:13 22:02:50-04:00', 'File:FileAccessDate': '2020:05:13 22:39:53-04:00', 'File:FileInodeChangeDate': '2020:05:13 22:02:50-04:00', 'File:FilePermissions': 644, 'File:FileType': 'MIE', 'File:FileTypeExtension': 'MIE', 'File:MIMEType': 'application/x-mie', 'XMP:XMPToolkit': 'Image::ExifTool 10.80', 'XMP:Version': 1.1} """ count = 0 files = os.listdir('./') # get all files with exiftool.ExifTool() as et: # get exiftool files_metadata = et.get_metadata_batch(files) # get all files metadata for metadata in files_metadata: # get file metadata one by one if 'XMP:Version' in metadata: # check if metadata contains 'XMP:Version' count = count + 1 # if so -> count it print('Total Version:1.1 files : %s' %count)
def getDngProfileDict(filename): """ Read profile related tags from a dng or dcp file. Return a dictionary of (str) decoded {tagname : tagvalue} pairs. @param filename: @type filename: str @return: dictionary @rtype: dict """ with exiftool.ExifTool() as e: profileDict = e.readBinaryDataAsDict( filename, taglist=[ 'LinearizationTable', 'ProfileLookTableData', 'ProfileLookTableDims', 'ProfileLookTableEncoding', 'ProfileToneCurve', 'CalibrationIlluminant1', 'CalibrationIlluminant2', 'ColorMatrix1', 'ColorMatrix2', 'CameraCalibration1', 'CameraCalibration2', 'ForwardMatrix1', 'ForwardMatrix2', 'AnalogBalance' ]) return profileDict
def GetDateFromFileData(currentFile): print("Checking file data for date...") sourcePath = pwd + "/" + currentFile destinationPath = -1 createDateTag = "CreateDate" fileModifyDateTag = "FileModifyDate" with exiftool.ExifTool() as et: createDate = et.get_tag(createDateTag, sourcePath) fileModifyDate = et.get_tag(fileModifyDateTag, sourcePath) oldestDate = GetOldestDate(createDate, fileModifyDate) year = oldestDate[0:4] month = oldestDate[5:7] day = oldestDate[8:10] print("Year: " + year) print("Month: " + month) print("Day: " + day) destinationPath = (str(year) + "/" + months[int(month) - 1]) return destinationPath
def with_datetime(movie_files): movie_files = copy.deepcopy(movie_files) with exiftool.ExifTool() as et: for movie_file in movie_files: metadata = et.get_tags(DATETIME_TAGS, movie_file['file_path']) for tag in DATETIME_TAGS: datetime = None for full_tag, value in metadata.items(): if tag in full_tag: datetime = value if datetime != None: break if datetime == None: raise ValueError( 'Couldn\'t get a datetime for {movie_file["file_path"]}') movie_file['datetime'] = parser.parse(datetime) return movie_files
def calculate_ruler(filename): # Use exiftool to extract EXIF and MakerNotes tags from the input image with exiftool.ExifTool() as et: metadata = et.get_metadata_batch([filename]) # Focus Distance in mm focus_distance = float(metadata[0]['MakerNotes:FocusDistance']) * 1000 # Focal length in mm focal_length = float(metadata[0]['EXIF:FocalLength']) # Sensor height: Compute from the ratio between Focal Length and # 35mm-Equivalent Focal Length tags in mm sensor_height = 24 * float(metadata[0]['EXIF:FocalLength']) / float( metadata[0]['EXIF:FocalLengthIn35mmFormat']) # Vertical height of the frame in mm # See: https://photo.stackexchange.com/questions/12434/how-do-i-calculate-the-distance-of-an-object-in-a-photo frame_height = ((focus_distance * sensor_height) / focal_length) return frame_height
def get_gps_data(videofile, sample_length=8, dtype=np.uint8): ''' Returns the GPS data contained within the videofile. ''' # make sure dtype is numpy dtype object dtype = np.dtype(dtype) # use exiftool to load the gps tag with exiftool.ExifTool() as et: data = et.get_tag('Unknown_gps', args.video) # decode the base64 data to a byte-string assert data.startswith('base64:') data = base64.b64decode(data[len('base64:'):]) # convert the byte string to a numpy array data = np.frombuffer(data, dtype=dtype) # and reshape according to sample_length (in bytes) return data.reshape((-1, sample_length//dtype.itemsize))
def rename_image(image_path): """Take in image path and rename image by site, camera, and datetime.""" # Get image datetime. with exiftool.ExifTool() as et: metadata = et.get_metadata(image_path) datetime_string = re.sub('[^0-9]+', '', metadata['EXIF:DateTimeOriginal']) if metadata["EXIF:Make"] == "RECONYX": if metadata['MakerNotes:Sequence'][1] != " ": sequence_number_string = str(metadata['MakerNotes:Sequence'][0:2]) else: sequence_number_string = str(metadata['MakerNotes:Sequence'][0]) name = "./data/s1c1_" + datetime_string + "_" + sequence_number_string + ".jpg" os.rename(image_path, name) else: i = 0 name = "./data/s1c2_" + datetime_string + "_" + str(i) + ".jpg" while os.path.exists(name): i += 1 name = "./data/s1c2_" + datetime_string + "_" + str(i) + ".jpg" os.rename(image_path, name) print(f"{image_path[7::]} renamed to {name}!")
def extract_exif_data_from_dir(path, collection_id, output): files_in_dir = [ f for f in listdir(path) if isfile(join(path, f)) and f.lower().endswith(('.tif')) ] if files_in_dir == []: print( "Filter.py did not find any .tif files in the directory given, " + path) return "" #print("Debug: Found files, "+str(files_in_dir)) with open(join(output, generate_filename(path)), "w") as output: with exiftool.ExifTool() as exif_extractor: csv_writer = csv.writer(output) generate_row(path, files_in_dir[0], exif_extractor, csv_writer, collection_id, True) for file in files_in_dir[1:]: generate_row(path, file, exif_extractor, csv_writer, collection_id)
def setUp(self): # Prepare exiftool. self.exiftool = exiftool.ExifTool() self.exiftool.start() # Prepare temporary directory for copy. directory = tempfile.mkdtemp(prefix='exiftool-test-') # Find example image. this_path = os.path.dirname(__file__) self.tag_source = os.path.join(this_path, 'rose.jpg') # Prepare path of copy. self.tag_target = os.path.join(directory, 'copy.jpg') # Copy image. shutil.copyfile(self.tag_source, self.tag_target) # Clear tags in copy. params = ['-overwrite_original', '-all=', self.tag_target] params_utf8 = [x.encode('utf-8') for x in params] self.exiftool.execute(*params_utf8)
def createPSImageList(image_path, interval): '''Creates a list of images to be processed by PS''' logging.info( "######## Creating Lists for processing Started ############") list_file = open(image_path + '/list.csv', 'w') file_list_sorted = os.listdir(image_path) file_list_sorted.sort() img_count = 0 with exiftool.ExifTool() as et: for file in file_list_sorted: if file.endswith(".jpg"): file_path = os.path.join(image_path, file) dateTimeString = et.get_tag("DateTimeOriginal", file_path) if dateTimeString: tmp_lst = dateTimeString.split(' ')[1].split(':') seconds = float(tmp_lst[0]) * 3600 + float( tmp_lst[1]) * 60 + float(tmp_lst[2]) list_file.write(file_path + ',' + str(seconds) + '\n') else: # A fall-back option for images with no time stamp. img_count = img_count + 1 list_file.write(file_path + ',' + str(img_count) + '\n') list_file.close() data = csv.reader(open(image_path + '/list.csv'), delimiter=',') sortedlist = sorted(data, key=operator.itemgetter(1)) initTime = float(sortedlist[0][1]) final_list_file = open(image_path + '/list.txt', 'w') final_list_file.write(sortedlist[0][0] + '\n') for element in sortedlist: second = float(element[1]) if second - initTime >= float(interval): final_list_file.write(element[0] + '\n') initTime = second final_list_file.close() logging.info("Total Images in directory: " + image_path + " are: " + str(len(file_list_sorted))) logging.info( "######## Creating Lists for processing Completed ############")
def index(): if request.method == "POST": file = request.files["file"] file.save(os.path.join("app/uploads", file.filename)) files = "app/uploads/" + file.filename with exiftool.ExifTool() as et: metadata = et.get_metadata(files) keys = metadata.keys() values = metadata.values() # data = {} # for keys, values in metadata.items(): # data [keys] = values # output = json.dumps(data) p_json = c_json(files, pefile.PE(files, fast_load=True), 0) fieldnames = p_json.keys() test = pd.DataFrame([p_json], columns=fieldnames) X_to_push = test X_testing = test.drop(['Name'], axis=1) clf = joblib.load( 'D:/Final_Year_Project/APP/app/MLmodels/RFC_model.pkl') X_testing_scaled = clf.named_steps['scale'].transform(X_testing) X_testing_pca = clf.named_steps['pca'].transform(X_testing_scaled) y_testing_pred = clf.named_steps['clf'].predict_proba(X_testing_pca) y_pred = y_testing_pred[0][0] * 100 if y_pred >= 40: pred_text = "Not Malicious" else: pred_text = "Malicious" return render_template("details.html", colnames=keys, records=values, predict=y_pred, predict_text=pred_text) return render_template("upload.html", message="Upload")
def update_meta_df(photo_directories, column_dictionary, file_types, meta_df_path, exif_tool_executable=None, append=True): if not append: meta_df = init_meta_df(column_dictionary) else: meta_df = load_meta_df(meta_df_path) filepaths = get_filepaths(photo_directories, file_types) filepath_set = set(filepaths) meta_df_filepath_set = set(meta_df['filepath']) new_files = filepath_set - meta_df_filepath_set if new_files: print(f'getting metadata on {len(new_files)} new files') ph_exif = exiftool.ExifTool(executable_=exif_tool_executable) ph_exif.start() else: print("no new files") return # TODO make parallel for i, filepath in enumerate(new_files): print(f'{i}: {filepath}') metadata_dict = ph_exif.get_metadata(filepath) column_dictionary['metadata_key'].extend([key for key in metadata_dict.keys()]) column_dictionary['metadata_value'].extend([val for val in metadata_dict.values()]) time_id = get_time_id(metadata_dict, dt_keys) column_dictionary['time_id'].extend([time_id] * len(metadata_dict)) column_dictionary['filepath'].extend([filepath] * len(metadata_dict)) column_dictionary['file_suffix'].extend([filepath.split('.')[-1]] * len(metadata_dict)) if (i != 0 and i % 10000 == 0): new_meta_df = dictionary_to_meta_df(column_dictionary) meta_df = pd.concat([meta_df,new_meta_df]) meta_df.to_pickle(meta_df_path) for value in column_dictionary.values(): value.clear() new_meta_df = dictionary_to_meta_df(column_dictionary) meta_df = pd.concat([meta_df, new_meta_df]) meta_df.to_pickle(meta_df_path) return
def read_metadata(filename, dataType, dataTypes, keyWords, config): import logging import re try: import exiftool exiftoolPath = None if (config.has_option("General", "exiftoolPath") and (config.get("General", "exiftoolPath") != "")): exiftoolPath = config.get("General", "exiftoolPath") with exiftool.ExifTool(exiftoolPath) as et: for key in dataTypes[dataType]["keys"]: if (keyWords[key]["readMetadata"] == []): continue regExpr = keyWords[key]["RegExprPattern"] tags = keyWords[key]["readMetadata"] for tag in tags: logging.debug( "Read Tag %s: execute: et.get_tag(\'-%s\', \'%s\')" % (tag, tag, filename)) value = et.get_tag(tag, filename) if (value == None): logging.debug("Tag was empty") continue logging.info("Tag value is: %s" % value) if (regExpr != None): value = re.search(regExpr, value) value = value.group(0) logging.debug( "Executing regular expression \'%s\' results: %s" % (regExpr, value)) if (value != None): value = remove_forbidden_characters(value) keyWords[key]["value"] = value logging.info("Reading metadata \'%s\' = %s" % (keyWords[key]["name"], value)) break except: logging.error("Error while reading metadata with exiftool", exc_info=True)