예제 #1
0
def metadataCheck(filename, md5):
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata(filename)
    for key in uselessexifkey:
        del metadata[key]
    hits = []
    for key, value in metadata.iteritems():
        for sig in badmetalist:
            if sig == value:
                logging.critical(timestamp() + ": Bad Metadata Alert: " + key +
                                 ":" + value + " MD5:" + md5)
                hits.append("Bad_Meta:" + key + value)
    if hits:
        metadata[u'Metadata_Alerts'] = str(hits).replace("u'",
                                                         "").replace("'", '')
    else:
        metadata[u'Metadata_Alerts'] = 'None'
    return metadata
예제 #2
0
def name_from_exif(file_path, exif_mime_type_key):
    filename, file_extension = os.path.splitext(file_path)
    exif_Executable = "/usr/bin/exiftool"

    with exiftool.ExifTool() as et:
        exif = et.get_metadata(file_path)
        #        print(exif)
        if exif_mime_type_key in exif:
            date_and_time = exif[exif_mime_type_key]
            parsed_date = datetime.datetime.strptime(date_and_time,
                                                     '%Y:%m:%d %H:%M:%S')
            new_name = parsed_date.strftime(
                "%Y-%m-%d-at-%Hh-%Mm-%S") + file_extension
            year = parsed_date.strftime("%Y")
            month = parsed_date.strftime("%m")
            return new_name, year, month
        else:
            return md5sum(file_path) + file_extension, None, None
예제 #3
0
def make_temp_image(basename, png):
    # load exif data
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata(basename + '.jpg')
    planck_r1 = metadata[u'APP1:PlanckR1']
    planck_r2 = metadata[u'APP1:PlanckR2']
    planck_b = metadata[u'APP1:PlanckB']
    planck_o = metadata[u'APP1:PlanckO']
    planck_f = metadata[u'APP1:PlanckF']

    # sometimes there are 0 values in a row; fix them by setting to min
    if png.min() < -planck_o:  # problem with png file
        temp_min = png[png > 10].min()
        png[png <= 10] = temp_min

    temp_func = np.vectorize(lambda x: calc_temp(x, planck_r1, planck_r2,
                                                 planck_b, planck_o, planck_f))
    return temp_func(png)
예제 #4
0
def get_meta_data(filein):
    """
    catch all for gathering meta data, tested on:
    R3D, RAF, JPG, MOV
    :param filein:
    :return:
    """
    files = []
    if filein.endswith('R3D'):
        return get_red_data(filein)
    # File types tested: RAF, JPG
    if isinstance(filein, basestring):
        files = [filein]
    elif isinstance(filein, list):
        files = filein
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata_batch(files)
    return metadata[0]
예제 #5
0
파일: helper.py 프로젝트: aranguren/tagger
def probedata(filepath, group="ALL", uuid=None, showgroups=False):
    """Uses ExifTool to probe metadata of a media file and return a python dict with those metadata.

    Args:
        filepath (str): File object of the main data file (e.g. foo.avi).
        group (str): (optional) fetch ALL,EXIF,XMP... tags.
        uuid (str): (optional) if uuid is defined and the group is ALL or Custom then fetch tags from the sqlite database as well.
        showgroups (optional): will return results as a dictionary of dictionaries for all the groups (useful for separating tag groups).

    Raises: 
        ExifException, DBException
    """
    et = exiftool.ExifTool(filepath)
    res = et.probe(group)
    if res == None:
        raise ExifException("Could not recognise file format")
    if uuid and (group == "ALL" or group == "Custom"):
        res.update(db.loadtags(uuid))
    return res
예제 #6
0
def exifread(image_loc):
    # EXIF READ: Read and store the specific EXIF tags of an image

    with exiftool.ExifTool() as et:
        try:
            img_width = float(et.get_tag('EXIF:ExifImageWidth', image_loc))
            img_height = float(et.get_tag('EXIF:ExifImageHeight', image_loc))
            cam_yaw = float(str(et.get_tag('XMP:GimbalYawDegree', image_loc)))
            cam_pitch = float(str(et.get_tag('XMP:GimbalPitchDegree', image_loc)))
            cam_roll = float(str(et.get_tag('XMP:GimbalRollDegree', image_loc)))
            flight_yaw = float(str(et.get_tag('XMP:FlightYawDegree', image_loc)))
            rel_alt = float(str(et.get_tag('XMP:RelativeAltitude', image_loc)))
            foc_len = float(et.get_tag('EXIF:FocalLength', image_loc))
            lat = (et.get_tag('EXIF:GPSLatitude', image_loc))
            lon = (et.get_tag('EXIF:GPSLongitude', image_loc))
            return img_width, img_height, cam_yaw, cam_pitch, cam_roll, flight_yaw, rel_alt, \
                foc_len, lat, lon
        except:
            return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
예제 #7
0
    def __import_original__(self, original_path):
        """
        safe-copy an image from original_path into the package and set all metadata
        """
        logger = logging.getLogger(sys._getframe().f_code.co_name)

        # verify and copy the original image
        real_path = validate_path(original_path, 'file')
        filename, extension = os.path.splitext(real_path)
        self.original = '.'.join(
            ('original', EXTENSIONS[extension[1:].lower()]))
        dest_path = os.path.join(self.path,
                                 self.original)  # fix up filename extensions
        hash_orig = safe_copy(real_path, dest_path)
        self.__append_event__(
            'copied original file from {src} to {dest}'.format(src=real_path,
                                                               dest=dest_path))
        self.manifest.set(self.original, hash_orig)

        # capture and store metadata from the original file using exiftool
        # note this could be optimized by re-using a single exiftool instance, but code refactoring will have to happen
        with exiftool.ExifTool() as et:
            metadata = et.get_metadata_batch([
                dest_path,
            ])
        exif_path = os.path.join(self.path, 'original-exif.json')
        with open(exif_path, 'w') as exif_file:
            for d in metadata:
                json.dump(d, exif_file, sort_keys=True, indent=4)
                logger.debug(
                    'wrote exiftool metadata for original file on {exif_path}'.
                    format(exif_path=exif_path))
                for k in d.keys():
                    logger.debug("exiftool found: {key}='{value}'".format(
                        key=k, value=d[k]))
        hash_exif = hash_of_file(exif_path)
        self.__append_event__(
            'wrote exif extracted from original file in json format on {exif_path}'
            .format(exif_path=exif_path))
        self.manifest.set('original-exif.json', hash_exif)

        # capture and store technical metadata using jhove (TBD)
        logger.warning('no jhove metadata is created')
예제 #8
0
def get_comment(img_path):
    if not os.path.isfile(img_path):
        print("Not a valid image path.")
        return None

    with exiftool.ExifTool() as et:
        metadata = et.get_metadata(img_path)
        exif_img_desc = metadata.get("EXIF:ImageDescription")
        caption_abst = metadata.get("IPTC:Caption-Abstract")
        if exif_img_desc and caption_abst:
            input(
                "Found caption in multiple EXIF tags for %s. Unhandled case." %
                os.path.basename(img_path))
        elif exif_img_desc:
            return exif_img_desc
        elif caption_abst:
            return caption_abst
        else:
            return None
예제 #9
0
    def search_at_mapillary(self, photo):
        exiftool.executable = self.exiftool_path
        #convert vales from exiftool to webservice format
        with exiftool.ExifTool() as et:
            metadata = et.get_metadata(photo)
            datetimestring = self.get_photo_timestamp(metadata)
            l = list(datetimestring)
            l[4] = '-'
            l[7] = '-'
            datetimestring = ''.join(l)

            dt = dateparser.parse(datetimestring)

            timestamp = int(time.mktime(dt.timetuple()))
            timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S')

            mapillry_instance = mapillary()
            result = mapillry_instance.search_mapillary(timestamp)
        return result
예제 #10
0
def read_exif(files):
    """
    :param files:
    :return:
    """
    exif_array = []
    filename = file_name
    bar = Bar('Reading EXIF Data', max=len(files))
    with exiftool.ExifTool() as et:
        # print(color.BLUE + "Scanning image exif tags " + color.END)
        metadata = iter(et.get_metadata_batch(files))
    for d in metadata:
        exif_array.append(d)
        bar.next()
    bar.finish()
    # print(color.BLUE + "Scanning images complete " + color.END)
    formatted = format_data(exif_array)
    writeOutputtoText(filename, formatted)
    print(color.GREEN + "Process Complete." + color.END)
예제 #11
0
파일: gps.py 프로젝트: ximhotep/pic2map
def filter_gps_metadata(paths):
    """Filter out metadata records that don't have GPS information.

    :param paths: Picture filenames to get metadata from
    :type paths: list(str)
    :returns: Picture files with GPS data
    :rtype: list(dict(str))

    """
    with exiftool.ExifTool() as tool:
        metadata_records = tool.get_tags_batch(TAGS, paths)

    gps_metadata_records = [
        metadata_record
        for metadata_record in metadata_records
        if validate_gps_metadata(metadata_record)
    ]

    return gps_metadata_records
예제 #12
0
def assign_uuid(filepath, overwrite=False):
    """Reads EXIF:ImageUniqueID tag for valid uuid. If no uuid exists, write mint_uuid() to EXIF:ImageUniqueID tag."""
    with exiftool.ExifTool() as et:
        uuid = et.get_tag('ImageUniqueID', filepath)

        # By default, if the `EXIF:ImageUniqueID` tag is empty, uuid is assigned to None.
        if (uuid is not None) and (overwrite is False):
            print("Tag EXIF:ImageUniqueID={} already exists in file {}.".format(uuid, filepath))

        # Else, no uuid was read by exiftool, or overwrite has been set to True.
        else:
            et.execute('-ImageUniqueID={}'.format(mint_uuid()).encode(), filepath.encode())
            os.remove(filepath+"_original")

            # Here, we only report back if the image file's uuid was updated.
            uuid = et.get_tag('ImageUniqueID', filepath)
            if uuid is not None:
                print("Wrote tag EXIF:ImageUniqueID={} to file {}.".format(uuid, filepath))
    return uuid
예제 #13
0
파일: picker.py 프로젝트: leelouch/dupphoto
	def __init__(
		self,
		options,
		title='Select',
		log  = False,
		arrow="-->",
		footer=mainFooter,
		more="...",
		border="||--++++",
		c_selected="[X]",
		c_empty="[ ]"
	):
		self.title = title
		self.arrow = arrow
		self.footer = footer
		self.more = more
		self.border = border
		self.c_selected = c_selected
		self.c_empty = c_empty
		self.filterMsg  = ""
		self.showAction = False
		self.selectIdx  = False
		self.options    = options
		self.all_options = []
		self.byIdx       = {}
		self.et          = exiftool.ExifTool()
		self.logEn       = log
		self.showMetaData= False
		if self.logEn:
			self.initLog()

		for idx, option in enumerate(options):
			d = {
				"label"   : option
			,	"selected": False
			,	"idx"     : idx
			,	"metadata": MediaInfo()
			,	"offset"  : 0
			}
			self.byIdx[idx] = d
			self.all_options.append(d)
			self.length = len(self.all_options)
예제 #14
0
    def from_directory(cls, directory, progress_callback=None, exiftool_path=None):
        """
        Create and ImageSet recursively from the files in a directory
        """
        cls.basedir = directory
        matches = []
        for root, dirnames, filenames in os.walk(directory):
#        for root, dirnames, filenames in os.walk(directory, topdown=False):
            for filename in fnmatch.filter(filenames, '*.tif'):
                matches.append(os.path.join(root, filename))

        images = []

        if exiftool_path is None and os.environ.get('exiftoolpath') is not None:
            exiftool_path = os.path.normpath(os.environ.get('exiftoolpath'))

        with exiftool.ExifTool(exiftool_path) as exift:
            for i,path in enumerate(matches):
                images.append(image.Image(path, exiftool_obj=exift))
                if progress_callback is not None:
                    progress_callback(float(i)/float(len(matches)))

        # create a dictionary to index the images so we can sort them
        # into captures
        # {
        #     "capture_id": [img1, img2, ...]
        # }
        captures_index = {}
        for img in images:
            c = captures_index.get(img.capture_id)
            if c is not None:
                c.append(img)
            else:
                captures_index[img.capture_id] = [img]
        captures = []
        for cap_imgs in captures_index:
            imgs = captures_index[cap_imgs]
            newcap = capture.Capture(imgs)
            captures.append(newcap)
        if progress_callback is not None:
            progress_callback(1.0)
        return cls(captures)
예제 #15
0
    def ready_for_mapillary(self, photo, skip_words=None):
        #check if photo has lat, lon and direction tags
        exiftool.executable = self.exiftool_path
        #convert vales from exiftool to webservice format
        #import json
        with exiftool.ExifTool() as et:
            metadata = et.get_metadata(photo)
            #print(json.dumps(metadata, indent = 4))

            direction = self._get_if_exist(metadata, "EXIF:GPSImgDirection")
            if direction is None:
                return False
            lat = self._get_if_exist(metadata, "EXIF:GPSLatitude")
            if lat is None:
                return False
            if skip_words is not None:
                for word in skip_words:
                    if word in os.path.basename(str(photo)):
                        return False
            return True
예제 #16
0
def get_metadata(malware_path):
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata(malware_path)
    del metadata[u'SourceFile']
    del metadata[u'File:FilePermissions']
    del metadata[u'File:Directory']
    del metadata[u'ExifTool:ExifToolVersion']
    try:
        del metadata[u'File:MIMEType']
    except Exception as e:
        print("[WARNING] ", e)
    metadataString = '\n'
    for each in metadata.items():
        try:
            metadataString += '\t\t' + str(each[0]).split(
                ':', 1)[-1] + ": " + str(each[1]) + "\n"
        except Exception as e:
            continue

    return metadataString
예제 #17
0
 def contextMenuHandler(action):
     global isSuspended
     if action.text() == 'Pause':
         isSuspended = True
         # quit full screen mode
         newWin.showMaximized()
     elif action.text() == 'Full Screen':
         if action.isChecked():
             newWin.showFullScreen()
         else:
             newWin.showMaximized()
     elif action.text() == 'Resume':
         newWin.close()
         isSuspended = False
         playDiaporama(diaporamaGenerator, parent=window)
     # rating : the tag is written into the .mie file; the file is
     # created if needed.
     elif action.text() in ['0', '1', '2', '3', '4', '5']:
         with exiftool.ExifTool() as e:
             e.writeXMPTag(name, 'XMP:rating', int(action.text()))
예제 #18
0
def task2():
    """ 
    How many files contain Version: 1.1 in their metadata?
    Note: move this scrip inside ./extracted
    lookingfor = {'SourceFile': '4jGg.txt', 'ExifTool:ExifToolVersion': 11.94, 'File:FileName': '4jGg.txt',
    'File:Directory': '.', 'File:FileSize': 2844, 'File:FileModifyDate': '2020:05:13 22:02:50-04:00', 
    'File:FileAccessDate': '2020:05:13 22:39:53-04:00', 'File:FileInodeChangeDate': '2020:05:13 22:02:50-04:00', 
    'File:FilePermissions': 644, 'File:FileType': 'MIE', 'File:FileTypeExtension': 'MIE', 'File:MIMEType': 'application/x-mie', 
    'XMP:XMPToolkit': 'Image::ExifTool 10.80', 'XMP:Version': 1.1}
    """
    count = 0
    files = os.listdir('./') # get all files
    
    with exiftool.ExifTool() as et: # get exiftool
        files_metadata = et.get_metadata_batch(files) # get all files metadata
    for metadata in files_metadata: # get file metadata one by one
        if 'XMP:Version' in metadata: # check if metadata contains 'XMP:Version'
            count = count + 1 # if so -> count it
    
    print('Total Version:1.1 files : %s' %count) 
예제 #19
0
def getDngProfileDict(filename):
    """
    Read profile related tags from a dng or dcp file.
    Return a dictionary of (str) decoded {tagname : tagvalue} pairs.
    @param filename:
    @type filename: str
    @return: dictionary
    @rtype: dict
    """
    with exiftool.ExifTool() as e:
        profileDict = e.readBinaryDataAsDict(
            filename,
            taglist=[
                'LinearizationTable', 'ProfileLookTableData',
                'ProfileLookTableDims', 'ProfileLookTableEncoding',
                'ProfileToneCurve', 'CalibrationIlluminant1',
                'CalibrationIlluminant2', 'ColorMatrix1', 'ColorMatrix2',
                'CameraCalibration1', 'CameraCalibration2', 'ForwardMatrix1',
                'ForwardMatrix2', 'AnalogBalance'
            ])
    return profileDict
예제 #20
0
def GetDateFromFileData(currentFile):
    print("Checking file data for date...")
    sourcePath = pwd + "/" + currentFile
    destinationPath = -1
    createDateTag = "CreateDate"
    fileModifyDateTag = "FileModifyDate"

    with exiftool.ExifTool() as et:
        createDate = et.get_tag(createDateTag, sourcePath)
        fileModifyDate = et.get_tag(fileModifyDateTag, sourcePath)
        oldestDate = GetOldestDate(createDate, fileModifyDate)

        year = oldestDate[0:4]
        month = oldestDate[5:7]
        day = oldestDate[8:10]
        print("Year: " + year)
        print("Month: " + month)
        print("Day: " + day)
        destinationPath = (str(year) + "/" + months[int(month) - 1])

    return destinationPath
예제 #21
0
def with_datetime(movie_files):
    movie_files = copy.deepcopy(movie_files)

    with exiftool.ExifTool() as et:
        for movie_file in movie_files:
            metadata = et.get_tags(DATETIME_TAGS, movie_file['file_path'])

            for tag in DATETIME_TAGS:
                datetime = None
                for full_tag, value in metadata.items():
                    if tag in full_tag:
                        datetime = value
                if datetime != None:
                    break

            if datetime == None:
                raise ValueError(
                    'Couldn\'t get a datetime for {movie_file["file_path"]}')

            movie_file['datetime'] = parser.parse(datetime)
    return movie_files
예제 #22
0
def calculate_ruler(filename):
    # Use exiftool to extract EXIF and MakerNotes tags from the input image
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata_batch([filename])

    # Focus Distance in mm
    focus_distance = float(metadata[0]['MakerNotes:FocusDistance']) * 1000

    # Focal length in mm
    focal_length = float(metadata[0]['EXIF:FocalLength'])

    # Sensor height: Compute from the ratio between Focal Length and
    # 35mm-Equivalent Focal Length tags in mm
    sensor_height = 24 * float(metadata[0]['EXIF:FocalLength']) / float(
        metadata[0]['EXIF:FocalLengthIn35mmFormat'])

    # Vertical height of the frame in mm
    # See: https://photo.stackexchange.com/questions/12434/how-do-i-calculate-the-distance-of-an-object-in-a-photo
    frame_height = ((focus_distance * sensor_height) / focal_length)

    return frame_height
예제 #23
0
def get_gps_data(videofile, sample_length=8, dtype=np.uint8):
    '''
    Returns the GPS data contained within the videofile.
    '''

    # make sure dtype is numpy dtype object
    dtype = np.dtype(dtype)

    # use exiftool to load the gps tag
    with exiftool.ExifTool() as et:
        data = et.get_tag('Unknown_gps', args.video)

    # decode the base64 data to a byte-string
    assert data.startswith('base64:')
    data = base64.b64decode(data[len('base64:'):])

    # convert the byte string to a numpy array
    data = np.frombuffer(data, dtype=dtype)

    # and reshape according to sample_length (in bytes)
    return data.reshape((-1, sample_length//dtype.itemsize))
예제 #24
0
def rename_image(image_path):
    """Take in image path and rename image by site, camera, and datetime."""
    # Get image datetime.
    with exiftool.ExifTool() as et:
        metadata = et.get_metadata(image_path)
    datetime_string = re.sub('[^0-9]+', '', metadata['EXIF:DateTimeOriginal'])
    if metadata["EXIF:Make"] == "RECONYX":
        if metadata['MakerNotes:Sequence'][1] != " ":
            sequence_number_string = str(metadata['MakerNotes:Sequence'][0:2])
        else:
            sequence_number_string = str(metadata['MakerNotes:Sequence'][0])
        name = "./data/s1c1_" + datetime_string + "_" + sequence_number_string + ".jpg"
        os.rename(image_path, name)
    else:
        i = 0
        name = "./data/s1c2_" + datetime_string + "_" + str(i) + ".jpg"
        while os.path.exists(name):
            i += 1
            name = "./data/s1c2_" + datetime_string + "_" + str(i) + ".jpg"
        os.rename(image_path, name)
    print(f"{image_path[7::]} renamed to {name}!")
예제 #25
0
def extract_exif_data_from_dir(path, collection_id, output):
    files_in_dir = [
        f for f in listdir(path)
        if isfile(join(path, f)) and f.lower().endswith(('.tif'))
    ]
    if files_in_dir == []:
        print(
            "Filter.py did not find any .tif files in the directory given, " +
            path)
        return ""
    #print("Debug: Found files, "+str(files_in_dir))

    with open(join(output, generate_filename(path)), "w") as output:
        with exiftool.ExifTool() as exif_extractor:
            csv_writer = csv.writer(output)

            generate_row(path, files_in_dir[0], exif_extractor, csv_writer,
                         collection_id, True)

            for file in files_in_dir[1:]:
                generate_row(path, file, exif_extractor, csv_writer,
                             collection_id)
예제 #26
0
    def setUp(self):
        # Prepare exiftool.
        self.exiftool = exiftool.ExifTool()
        self.exiftool.start()

        # Prepare temporary directory for copy.
        directory = tempfile.mkdtemp(prefix='exiftool-test-')

        # Find example image.
        this_path = os.path.dirname(__file__)
        self.tag_source = os.path.join(this_path, 'rose.jpg')

        # Prepare path of copy.
        self.tag_target = os.path.join(directory, 'copy.jpg')

        # Copy image.
        shutil.copyfile(self.tag_source, self.tag_target)

        # Clear tags in copy.
        params = ['-overwrite_original', '-all=', self.tag_target]
        params_utf8 = [x.encode('utf-8') for x in params]
        self.exiftool.execute(*params_utf8)
예제 #27
0
def createPSImageList(image_path, interval):
    '''Creates a list of images to be processed by PS'''
    logging.info(
        "######## Creating Lists for processing Started  ############")
    list_file = open(image_path + '/list.csv', 'w')
    file_list_sorted = os.listdir(image_path)
    file_list_sorted.sort()
    img_count = 0
    with exiftool.ExifTool() as et:
        for file in file_list_sorted:
            if file.endswith(".jpg"):
                file_path = os.path.join(image_path, file)
                dateTimeString = et.get_tag("DateTimeOriginal", file_path)
                if dateTimeString:
                    tmp_lst = dateTimeString.split(' ')[1].split(':')
                    seconds = float(tmp_lst[0]) * 3600 + float(
                        tmp_lst[1]) * 60 + float(tmp_lst[2])
                    list_file.write(file_path + ',' + str(seconds) + '\n')
                else:  # A fall-back option for images with no time stamp.
                    img_count = img_count + 1
                    list_file.write(file_path + ',' + str(img_count) + '\n')
    list_file.close()

    data = csv.reader(open(image_path + '/list.csv'), delimiter=',')
    sortedlist = sorted(data, key=operator.itemgetter(1))

    initTime = float(sortedlist[0][1])
    final_list_file = open(image_path + '/list.txt', 'w')
    final_list_file.write(sortedlist[0][0] + '\n')
    for element in sortedlist:
        second = float(element[1])
        if second - initTime >= float(interval):
            final_list_file.write(element[0] + '\n')
            initTime = second
    final_list_file.close()
    logging.info("Total Images in directory: " + image_path + "  are:  " +
                 str(len(file_list_sorted)))
    logging.info(
        "######## Creating Lists for processing  Completed ############")
예제 #28
0
def index():

    if request.method == "POST":
        file = request.files["file"]
        file.save(os.path.join("app/uploads", file.filename))
        files = "app/uploads/" + file.filename
        with exiftool.ExifTool() as et:
            metadata = et.get_metadata(files)

        keys = metadata.keys()
        values = metadata.values()
        # data = {}
        # for keys, values in metadata.items():
        #     data [keys] = values
        # output = json.dumps(data)

        p_json = c_json(files, pefile.PE(files, fast_load=True), 0)
        fieldnames = p_json.keys()
        test = pd.DataFrame([p_json], columns=fieldnames)
        X_to_push = test
        X_testing = test.drop(['Name'], axis=1)
        clf = joblib.load(
            'D:/Final_Year_Project/APP/app/MLmodels/RFC_model.pkl')
        X_testing_scaled = clf.named_steps['scale'].transform(X_testing)
        X_testing_pca = clf.named_steps['pca'].transform(X_testing_scaled)
        y_testing_pred = clf.named_steps['clf'].predict_proba(X_testing_pca)
        y_pred = y_testing_pred[0][0] * 100
        if y_pred >= 40:
            pred_text = "Not Malicious"
        else:
            pred_text = "Malicious"

        return render_template("details.html",
                               colnames=keys,
                               records=values,
                               predict=y_pred,
                               predict_text=pred_text)

    return render_template("upload.html", message="Upload")
예제 #29
0
def update_meta_df(photo_directories, column_dictionary, file_types, meta_df_path, exif_tool_executable=None,
                   append=True):
    if not append:
        meta_df = init_meta_df(column_dictionary)
    else:
        meta_df = load_meta_df(meta_df_path)

    filepaths = get_filepaths(photo_directories, file_types)
    filepath_set = set(filepaths)
    meta_df_filepath_set = set(meta_df['filepath'])
    new_files = filepath_set - meta_df_filepath_set
    if new_files:
        print(f'getting metadata on {len(new_files)} new files')
        ph_exif = exiftool.ExifTool(executable_=exif_tool_executable)
        ph_exif.start()
    else:
        print("no new files")
        return
    # TODO make parallel
    for i, filepath in enumerate(new_files):
        print(f'{i}: {filepath}')
        metadata_dict = ph_exif.get_metadata(filepath)
        column_dictionary['metadata_key'].extend([key for key in metadata_dict.keys()])
        column_dictionary['metadata_value'].extend([val for val in metadata_dict.values()])
        time_id = get_time_id(metadata_dict, dt_keys)
        column_dictionary['time_id'].extend([time_id] * len(metadata_dict))
        column_dictionary['filepath'].extend([filepath] * len(metadata_dict))
        column_dictionary['file_suffix'].extend([filepath.split('.')[-1]] * len(metadata_dict))
        if (i != 0 and i % 10000 == 0):
            new_meta_df = dictionary_to_meta_df(column_dictionary)
            meta_df = pd.concat([meta_df,new_meta_df])
            meta_df.to_pickle(meta_df_path)
            for value in column_dictionary.values():
                value.clear()

    new_meta_df = dictionary_to_meta_df(column_dictionary)
    meta_df = pd.concat([meta_df, new_meta_df])
    meta_df.to_pickle(meta_df_path)
    return
예제 #30
0
def read_metadata(filename, dataType, dataTypes, keyWords, config):
    import logging
    import re
    try:
        import exiftool
        exiftoolPath = None
        if (config.has_option("General", "exiftoolPath")
                and (config.get("General", "exiftoolPath") != "")):
            exiftoolPath = config.get("General", "exiftoolPath")
        with exiftool.ExifTool(exiftoolPath) as et:
            for key in dataTypes[dataType]["keys"]:
                if (keyWords[key]["readMetadata"] == []):
                    continue
                regExpr = keyWords[key]["RegExprPattern"]
                tags = keyWords[key]["readMetadata"]
                for tag in tags:
                    logging.debug(
                        "Read Tag %s: execute: et.get_tag(\'-%s\', \'%s\')" %
                        (tag, tag, filename))
                    value = et.get_tag(tag, filename)
                    if (value == None):
                        logging.debug("Tag was empty")
                        continue
                    logging.info("Tag value is: %s" % value)
                    if (regExpr != None):
                        value = re.search(regExpr, value)
                        value = value.group(0)
                        logging.debug(
                            "Executing regular expression \'%s\' results: %s" %
                            (regExpr, value))
                    if (value != None):
                        value = remove_forbidden_characters(value)
                        keyWords[key]["value"] = value
                        logging.info("Reading metadata \'%s\' = %s" %
                                     (keyWords[key]["name"], value))
                        break
    except:
        logging.error("Error while reading metadata with exiftool",
                      exc_info=True)