def __init__(
        self,
        full_file_name: str,
        et_process: exiftool.ExifTool,
        file_type: Optional[FileType] = None,
    ) -> None:
        """
        Get photo and video metadata using Exiftool

        :param filename: the file from which to get metadata
        :param et_process: instance of ExifTool class, which allows
        calling EXifTool without it exiting with each call
        :param file_type: photo or video. If not specified, will be determined
         using file extension
        """

        super().__init__()

        self.full_file_name = full_file_name
        if full_file_name is not None:
            self.ext = fileformats.extract_extension(full_file_name)
        else:
            self.ext = None
        self.metadata = dict()
        self.metadata_string_format = dict()
        self.et_process = et_process
        if file_type is None and full_file_name is not None:
            file_type = fileformats.file_type_from_splitext(
                file_name=full_file_name)
        assert file_type is not None
        self.file_type = file_type

        # All the names of the preview images we know about (there may be more, perhaps)
        # Synchronize with preview_smallest and preview256 dicts below
        self.index_preview = OrderedDict(
            sorted(_index_preview.items(), key=lambda t: t[0]))

        # If extension is not in dict preview_smallest, that means the file
        # format always contains a "ThumbnailImage"
        self.preview_smallest = dict(
            crw=(2, ),
            dng=(4, 3, 0),
            fff=(3, ),
            iiq=(4, ),
            mrw=(0, ),
            nef=(4, 3),
            raw=(2, ),
        )
        self.preview_smallest["3fr"] = 3, 4

        # Format might have a thumbnail, but might not
        self.may_have_thumbnail = ("crw", "mrw", "orf", "raw", "x3f")

        # Preview images that are at least 256 pixels big, according to
        # self.index_preview
        self.preview256 = dict(
            arw=(0, ),
            cr2=(0, ),
            cr3=(0, ),
            crw=(2, ),
            dng=(0, 3),
            fff=(3, ),
            iiq=(4, ),
            mrw=(0, ),
            nef=(
                0,
                4,
                2,
                3,
            ),  # along with DNG quite possibly the most inconsistent format
            nrw=(0, 1),
            orf=(0, ),
            pef=(0, ),
            raf=(0, ),
            raw=(2, ),
            rw2=(2, ),
            sr2=(0, ),
            srw=(0, ),
            x3f=(0, 2),
        )
        self.preview256["3fr"] = 3, 4

        self.ignore_tiff_preview_256 = ("cr2", )
def scan(
    folder: str,
    disk_cach_cleared: bool,
    scan_types: List[str],
    errors: bool,
    outfile: str,
    keep_file_names: bool,
    analyze_previews: bool,
) -> Tuple[List[PhotoAttributes], List[VideoAttributes]]:

    global stop
    global kill

    problematic_files = "RAW_LEICA_M8.DNG"

    stop = kill = False

    pbs = progress_bar_scanning()
    pbs.start()

    test_files = []
    not_tested = []
    # Phase 1
    # Determine which files are safe to test i.e. are not cached

    if analyze_previews:
        disk_cach_cleared = True

    for dir_name, subdirs, filenames in walk(folder):
        for filename in filenames:
            if filename not in problematic_files:
                ext = extract_extension(filename)
                if ext in scan_types:
                    full_file_name = os.path.join(dir_name, filename)

                    if disk_cach_cleared:
                        test_files.append((full_file_name, ext.upper()))
                    else:
                        bytes_cached, total, in_memory = vmtouch_output(
                            full_file_name)
                        if bytes_cached == 0:
                            test_files.append((full_file_name, ext.upper()))
                        else:
                            not_tested.append(full_file_name)

    stop = True
    pbs.join()

    if not_tested:
        print()
        if len(not_tested) > 20:
            for line in textwrap.wrap(
                    "WARNING: {:,} files will not be analyzed because they are already in the "
                    "kernel disk cache.".format(len(not_tested)),
                    width=80,
            ):
                print(line)
        else:
            print(
                "WARNING: these files will not be analyzed because they are already in the "
                "kernel disk cache:")
            for name in not_tested:
                print(name)
        print()
        for line in textwrap.wrap(
                "Run this script as super user and use command line option -c or --clear to safely "
                "clear the disk cache.",
                width=80,
        ):
            print(line)

        if confirm(prompt="\nDo you want to exit?", resp=True):
            sys.exit(0)

    photos = []
    videos = []

    if test_files:
        print("\nAnalyzing {:,} files:".format(len(test_files)))
        if have_progressbar and not errors:
            bar = pyprind.ProgBar(iterations=len(test_files),
                                  stream=1,
                                  track_time=False,
                                  width=80)
    else:
        print("\nNothing to analyze")

    # Phase 2
    # Get info from files

    if errors:
        context = show_errors()
    else:
        # Redirect stderr, hiding error output from exiv2
        context = stdchannel_redirected(sys.stderr, os.devnull)

    metadata_fail = []

    with context:
        with ExifTool() as exiftool_process:
            for full_file_name, ext in test_files:
                if ext.lower() in VIDEO_EXTENSIONS:
                    va = VideoAttributes(full_file_name, ext, exiftool_process)
                    videos.append(va)
                else:
                    # TODO think about how to handle HEIF files!
                    if use_exiftool_on_photo(
                            ext.lower(), preview_extraction_irrelevant=False):
                        pa = ExifToolPhotoAttributes(full_file_name, ext,
                                                     exiftool_process,
                                                     analyze_previews)
                        pa.process(analyze_previews)
                        photos.append(pa)
                    else:
                        try:
                            metadata = mp.MetaData(
                                full_file_name=full_file_name,
                                et_process=exiftool_process,
                            )
                        except:
                            metadata_fail.append(full_file_name)
                        else:
                            pa = PhotoAttributes(full_file_name, ext,
                                                 exiftool_process,
                                                 analyze_previews)
                            pa.metadata = metadata
                            pa.process(analyze_previews)
                            photos.append(pa)

                if have_progressbar and not errors:
                    bar.update()

    if metadata_fail:
        print()
        for full_file_name in metadata_fail:
            print("Could not read metadata from {}".format(full_file_name))

    if outfile is not None:
        if not keep_file_names:
            for pa in photos:
                pa.file_name = None
            for va in videos:
                va.file_name = None

        with open(outfile, "wb") as save_to:
            pickle.dump((photos, videos), save_to, pickle.HIGHEST_PROTOCOL)

    return photos, videos
Ejemplo n.º 3
0
    def __init__(self,
                 name: str,
                 path: str,
                 size: int,
                 prev_full_name: Optional[str],
                 prev_datetime: Optional[datetime],
                 device_timestamp_type: DeviceTimestampTZ,
                 mtime: float,
                 mdatatime: float,
                 thumbnail_cache_status: ThumbnailCacheDiskStatus,
                 thm_full_name: Optional[str],
                 audio_file_full_name: Optional[str],
                 xmp_file_full_name: Optional[str],
                 log_file_full_name: Optional[str],
                 scan_id: bytes,
                 from_camera: bool,
                 never_read_mdatatime: bool,
                 device_display_name: str,
                 device_uri: str,
                 camera_details: Optional[CameraDetails] = None,
                 camera_memory_card_identifiers: Optional[List[int]] = None,
                 raw_exif_bytes: Optional[bytes] = None,
                 exif_source: Optional[ExifSource] = None,
                 problem: Optional[Problem] = None) -> None:
        """

        :param name: filename, including the extension, without its path
        :param path: path of the file
        :param size: file size
        :param device_timestamp_type: the method with which the device
         records timestamps.
        :param mtime: file modification time
        :param mdatatime: file time recorded in metadata
        :param thumbnail_cache_status: whether there is an entry in the thumbnail
         cache or not
        :param prev_full_name: the name and path the file was
         previously downloaded with, else None
        :param prev_datetime: when the file was previously downloaded,
         else None
        :param thm_full_name: name and path of and associated thumbnail
         file
        :param audio_file_full_name: name and path of any associated
         audio file
        :param xmp_file_full_name: name and path of any associated XMP
         file
        :param log_file_full_name: name and path of any associated LOG
          file
        :param scan_id: id of the scan
        :param from_camera: whether the file is being downloaded from a
         camera
        :param never_read_mdatatime: whether to ignore the metadata
         date time when determining a photo or video's creation time,
         and rely only on the file modification time
        :param device_display_name: display name of the device the file was found on 
        :param device_uri: the uri of the device the file was found on
        :param camera_details: details about the camera, such as model name,
         port, etc.
        :param camera_memory_card_identifiers: if downloaded from a
         camera, and the camera has more than one memory card, a list
         of numeric identifiers (i.e. 1 or 2) identifying which memory
         card the file came from
        :param raw_exif_bytes: excerpt of the file's metadata in bytes format
        :param exif_source: source of photo metadata
        :param problem: any problems encountered
        """

        self.from_camera = from_camera
        self.camera_details = camera_details

        self.device_display_name = device_display_name
        self.device_uri = device_uri

        if camera_details is not None:
            self.camera_model = camera_details.model
            self.camera_port = camera_details.port
            self.camera_display_name = camera_details.display_name
            self.is_mtp_device = camera_details.is_mtp == True
            self.camera_storage_descriptions = camera_details.storage_desc
        else:
            self.camera_model = self.camera_port = self.camera_display_name = None
            self.camera_storage_descriptions = None
            self.is_mtp_device = False

        self.path = path

        self.name = name

        self.prev_full_name = prev_full_name
        self.prev_datetime = prev_datetime
        self.previously_downloaded = prev_full_name is not None

        self.full_file_name = os.path.join(path, name)

        # Used in sample RPD files
        self.raw_exif_bytes = raw_exif_bytes
        self.exif_source = exif_source

        # Indicate whether file is a photo or video
        self._assign_file_type()

        # Remove the period from the extension and make it lower case
        self.extension = fileformats.extract_extension(name)
        # Classify file based on its type e.g. jpeg, raw or tiff etc.
        self.extension_type = fileformats.extension_type(self.extension)

        self.mime_type = mimetypes.guess_type(name)[0]

        assert size > 0
        self.size = size

        # Cached version of call to metadata.date_time()
        self._datetime = None  # type: Optional[datetime]

        ############################
        # self._no_datetime_metadata
        ############################
        # If True, tried to read the date time metadata, and failed
        # If None, haven't tried yet
        # If False, no problems encountered, got it (or it was assigned from mtime
        # when never_read_mdatatime is True)
        self._no_datetime_metadata = None  # type: Optional[bool]

        self.never_read_mdatatime = never_read_mdatatime
        if never_read_mdatatime:
            assert self.extension == 'dng'

        self.device_timestamp_type = device_timestamp_type

        ###########
        # self.ctime
        ###########
        #
        # self.ctime is the photo or video's creation time. It's value depends
        # on the values in self.modification_time and self.mdatatime. It's value
        # is set by the setter functions below.
        #
        # Ideally the file's metadata contains the date/time that the file
        # was created. However the metadata may not have been read yet (it's a slow
        # operation), or it may not exist or be invalid. In that case, need to rely on
        # the file modification time as a proxy, as reported by the file system or device.
        #
        # However that can also be misleading. On my Canon DSLR, for instance, if I'm in the
        # timezone UTC + 5, and I take a photo at 5pm, then the time stamp on the memory card
        # shows the photo being taken at 10pm when I look at it on the computer. The timestamp
        # written to the memory card should with this camera be read as
        # datetime.utcfromtimestamp(mtime), which would return a time zone naive value of 5pm.
        # In other words, the timestamp on the memory card is written as if it were always in
        # UTC, regardless of which timezone the photo was taken in.
        #
        # Yet this is not the case with a cellphone, where the file modification time knows
        # nothing about UTC and just saves it as a naive local time.

        self.mdatatime_caused_ctime_change = False

        # file modification time
        self.modification_time = mtime
        # date time recorded in metadata
        if never_read_mdatatime:
            self.mdatatime = mtime
        else:
            self.mdatatime = mdatatime
        self.mdatatime_caused_ctime_change = False

        # If a camera has more than one memory card, store a simple numeric
        # identifier to indicate which memory card it came from
        self.camera_memory_card_identifiers = camera_memory_card_identifiers

        # full path and name of thumbnail file that is associated with some
        # videos
        self.thm_full_name = thm_full_name

        # full path and name of audio file that is associated with some photos
        # and maybe one day videos, e.g. found with the Canon 1D series of
        # cameras
        self.audio_file_full_name = audio_file_full_name

        self.xmp_file_full_name = xmp_file_full_name
        # log files: see https://wiki.magiclantern.fm/userguide#movie_logging
        self.log_file_full_name = log_file_full_name

        self.status = DownloadStatus.not_downloaded
        self.problem = problem

        self.scan_id = int(scan_id)
        self.uid = uuid.uuid4().bytes

        self.job_code = None

        # freedesktop.org cache thumbnails
        # http://specifications.freedesktop.org/thumbnail-spec/thumbnail-spec-latest.html
        self.thumbnail_status = ThumbnailCacheStatus.not_ready  # type: ThumbnailCacheStatus
        self.fdo_thumbnail_128_name = ''
        self.fdo_thumbnail_256_name = ''
        # PNG data > 128x128 <= 256x256
        self.fdo_thumbnail_256 = None  # type: Optional[bytes]

        # Thee status of the file in the Rapid Photo Downloader thumbnail cache
        self.thumbnail_cache_status = thumbnail_cache_status

        # generated values

        self.cache_full_file_name = ''
        # temporary file used only for video metadata extraction:
        self.temp_sample_full_file_name = None  # type: Optional[str]
        # if True, the file is a complete copy of the original
        self.temp_sample_is_complete_file = False
        self.temp_full_file_name = ''
        self.temp_thm_full_name = ''
        self.temp_audio_full_name = ''
        self.temp_xmp_full_name = ''
        self.temp_log_full_name = ''
        self.temp_cache_full_file_chunk = ''

        self.download_start_time = None

        self.download_folder = ''
        self.download_subfolder = ''
        self.download_path = ''  # os.path.join(download_folder, download_subfolder)
        self.download_name = ''
        self.download_full_file_name = ''  # filename with path
        self.download_full_base_name = ''  # filename with path but no extension
        self.download_thm_full_name = ''  # name of THM (thumbnail) file with path
        self.download_xmp_full_name = ''  # name of XMP sidecar with path
        self.download_log_full_name = ''  # name of LOG associate file with path
        self.download_audio_full_name = ''  # name of the WAV or MP3 audio file with path

        self.thm_extension = ''
        self.audio_extension = ''
        self.xmp_extension = ''
        self.log_extension = ''

        self.metadata = None  # type: Optional[Union[metadataphoto.MetaData, metadatavideo.MetaData, metadataexiftool.MetadataExiftool]]
        self.metadata_failure = False  # type: bool

        # User preference values used for name generation
        self.subfolder_pref_list = []  # type: List[str]
        self.name_pref_list = []  # type: List[str]
        self.generate_extension_case = ''  # type: str

        self.modified_via_daemon_process = False

        # If true, there was a name generation problem
        self.name_generation_problem = False