Esempio n. 1
0
def _generate_db(source):
    """Regenerate the hash.json database which contains all of the sha1 signatures of media files.
    """
    result = Result()
    source = os.path.abspath(os.path.expanduser(source))

    extensions = set()
    all_files = set()
    valid_files = set()

    if not os.path.isdir(source):
        log.error('Source is not a valid directory %s' % source)
        sys.exit(1)
        
    subclasses = get_all_subclasses(Base)
    for cls in subclasses:
        extensions.update(cls.extensions)

    all_files.update(FILESYSTEM.get_all_files(source, None))

    db = Db()
    db.backup_hash_db()
    db.reset_hash_db()

    for current_file in all_files:
        if os.path.splitext(current_file)[1][1:].lower() not in extensions:
            log.info('Skipping invalid file %s' % current_file)
            result.append((current_file, False))
            continue

        result.append((current_file, True))
        db.add_hash(db.checksum(current_file), current_file)
    
    db.update_hash_db()
    result.write()
Esempio n. 2
0
def test_get_hash_exists():
    db = Db()

    random_key = helper.random_string(10)
    random_value = helper.random_string(12)

    # Test with explicit False value as 3rd param
    db.add_hash(random_key, random_value, False)

    assert db.get_hash(random_key) == random_value, 'Lookup for hash that exists did not return value'
Esempio n. 3
0
def test_reset_hash_db():
    db = Db()

    random_key = helper.random_string(10)
    random_value = helper.random_string(12)

    # Test with explicit False value as 3rd param
    db.add_hash(random_key, random_value, False)
    
    assert random_key in db.hash_db, random_key
    db.reset_hash_db()
    assert random_key not in db.hash_db, random_key
Esempio n. 4
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        if(not media.is_valid()):
            print '%s is not a valid media file. Skipping...' % _file
            return

        metadata = media.get_metadata()

        directory_name = self.get_folder_path(metadata)

        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(media)
        dest_path = os.path.join(dest_directory, file_name)

        db = Db()
        checksum = db.checksum(_file)
        if(checksum is None):
            if(constants.debug is True):
                print 'Could not get checksum for %s. Skipping...' % _file
            return

        # If duplicates are not allowed and this hash exists in the db then we
        #   return
        if(allow_duplicate is False and db.check_hash(checksum) is True):
            if(constants.debug is True):
                print '%s already exists at %s. Skipping...' % (
                    _file,
                    db.get_hash(checksum)
                )
            return

        self.create_directory(dest_directory)

        if(move is True):
            stat = os.stat(_file)
            shutil.move(_file, dest_path)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            shutil.copy2(_file, dest_path)

        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        return dest_path
Esempio n. 5
0
def test_add_hash_explicit_write():
    db = Db()

    random_key = helper.random_string(10)
    random_value = helper.random_string(12)

    # Test with explicit True value as 3rd param
    db.add_hash(random_key, random_value, True)

    assert db.check_hash(random_key) == True, 'Lookup for hash did not return True'

    # Instnatiate new db class to confirm random_key exists
    db2 = Db()
    assert db2.check_hash(random_key) == True
Esempio n. 6
0
def test_add_hash_default_do_not_write():
    db = Db()

    random_key = helper.random_string(10)
    random_value = helper.random_string(12)

    # Test with default False value as 3rd param
    db.add_hash(random_key, random_value)

    assert db.check_hash(random_key) == True, 'Lookup for hash did not return True'

    # Instnatiate new db class to confirm random_key does not exist
    db2 = Db()
    assert db2.check_hash(random_key) == False
Esempio n. 7
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if ('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if ('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        if (not media.is_valid()):
            print '%s is not a valid media file. Skipping...' % _file
            return

        metadata = media.get_metadata()

        directory_name = self.get_folder_path(metadata)

        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(media)
        dest_path = os.path.join(dest_directory, file_name)

        db = Db()
        checksum = db.checksum(_file)
        if (checksum is None):
            if (constants.debug is True):
                print 'Could not get checksum for %s. Skipping...' % _file
            return

        # If duplicates are not allowed and this hash exists in the db then we
        #   return
        if (allow_duplicate is False and db.check_hash(checksum) is True):
            if (constants.debug is True):
                print '%s already exists at %s. Skipping...' % (
                    _file, db.get_hash(checksum))
            return

        self.create_directory(dest_directory)

        if (move is True):
            stat = os.stat(_file)
            shutil.move(_file, dest_path)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            shutil.copy2(_file, dest_path)

        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        return dest_path
Esempio n. 8
0
def test_get_all():
    db = Db()
    db.reset_hash_db()

    random_keys = []
    random_values = []
    for _ in range(10):
        random_keys.append(helper.random_string(10))
        random_values.append(helper.random_string(12))
        db.add_hash(random_keys[-1:][0], random_values[-1:][0], False)

    counter = 0
    for key, value in db.all():
        assert key in random_keys, key
        assert value in random_values, value
        counter += 1

    assert counter == 10, counter
Esempio n. 9
0
def _generate_db(source):
    """Regenerate the hash.json database which contains all of the sha1 signatures of media files.
    """
    result = Result()
    source = os.path.abspath(os.path.expanduser(source))

    if not os.path.isdir(source):
        log.error('Source is not a valid directory %s' % source)
        sys.exit(1)

    db = Db()
    db.backup_hash_db()
    db.reset_hash_db()

    for current_file in FILESYSTEM.get_all_files(source):
        result.append((current_file, True))
        db.add_hash(db.checksum(current_file), current_file)
        log.progress()

    db.update_hash_db()
    log.progress('', True)
    result.write()
Esempio n. 10
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        stat_info_original = os.stat(_file)
        metadata = media.get_metadata()

        if(not media.is_valid()):
            print('%s is not a valid media file. Skipping...' % _file)
            return

        checksum = self.process_checksum(_file, allow_duplicate)
        if(checksum is None):
            log.info('Original checksum returned None for %s. Skipping...' %
                     _file)
            return

        # Run `before()` for every loaded plugin and if any of them raise an exception
        #  then we skip importing the file and log a message.
        plugins_run_before_status = self.plugins.run_all_before(_file, destination)
        if(plugins_run_before_status == False):
            log.warn('At least one plugin pre-run failed for %s' % _file)
            return

        directory_name = self.get_folder_path(metadata)
        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(metadata)
        dest_path = os.path.join(dest_directory, file_name)

        media.set_original_name()

        # If source and destination are identical then
        #  we should not write the file. gh-210
        if(_file == dest_path):
            print('Final source and destination path should not be identical')
            return

        self.create_directory(dest_directory)

        # exiftool renames the original file by appending '_original' to the
        # file name. A new file is written with new tags with the initial file
        # name. See exiftool man page for more details.
        exif_original_file = _file + '_original'

        # Check if the source file was processed by exiftool and an _original
        # file was created.
        exif_original_file_exists = False
        if(os.path.exists(exif_original_file)):
            exif_original_file_exists = True

        if(move is True):
            stat = os.stat(_file)
            # Move the processed file into the destination directory
            shutil.move(_file, dest_path)

            if(exif_original_file_exists is True):
                # We can remove it as we don't need the initial file.
                os.remove(exif_original_file)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            if(exif_original_file_exists is True):
                # Move the newly processed file with any updated tags to the
                # destination directory
                shutil.move(_file, dest_path)
                # Move the exif _original back to the initial source file
                shutil.move(exif_original_file, _file)
            else:
                compatability._copyfile(_file, dest_path)

            # Set the utime based on what the original file contained
            #  before we made any changes.
            # Then set the utime on the destination file based on metadata.
            os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime))
            self.set_utime_from_metadata(metadata, dest_path)

        db = Db()
        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        # Run `after()` for every loaded plugin and if any of them raise an exception
        #  then we skip importing the file and log a message.
        plugins_run_after_status = self.plugins.run_all_after(_file, destination, dest_path, metadata)
        if(plugins_run_after_status == False):
            log.warn('At least one plugin pre-run failed for %s' % _file)
            return


        return dest_path
Esempio n. 11
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if ('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if ('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        if (not media.is_valid()):
            print('%s is not a valid media file. Skipping...' % _file)
            return

        metadata = media.get_metadata()

        directory_name = self.get_folder_path(metadata)

        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(media)
        dest_path = os.path.join(dest_directory, file_name)

        db = Db()
        checksum = db.checksum(_file)
        if (checksum is None):
            log.info('Could not get checksum for %s. Skipping...' % _file)
            return

        # If duplicates are not allowed then we check if we've seen this file
        #  before via checksum. We also check that the file exists at the
        #   location we believe it to be.
        # If we find a checksum match but the file doesn't exist where we
        #  believe it to be then we write a debug log and proceed to import.
        checksum_file = db.get_hash(checksum)
        if (allow_duplicate is False and checksum_file is not None):
            if (os.path.isfile(checksum_file)):
                log.info('%s already exists at %s. Skipping...' %
                         (_file, checksum_file))
                return
            else:
                log.info(
                    '%s matched checksum but file not found at %s. Importing again...'
                    % (  # noqa
                        _file, checksum_file))

        self.create_directory(dest_directory)

        if (move is True):
            stat = os.stat(_file)
            shutil.move(_file, dest_path)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            # Do not use copy2(), will have an issue when copying to a
            # network/mounted drive using copy and manual
            # set_date_from_filename gets the job done
            shutil.copy(_file, dest_path)
            self.set_utime(media)

        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        return dest_path
Esempio n. 12
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if ('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if ('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        if (not media.is_valid()):
            print('%s is not a valid media file. Skipping...' % _file)
            return

        media.set_original_name()
        metadata = media.get_metadata()

        directory_name = self.get_folder_path(metadata)

        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(media)
        dest_path = os.path.join(dest_directory, file_name)

        db = Db()
        checksum = db.checksum(_file)
        if (checksum is None):
            log.info('Could not get checksum for %s. Skipping...' % _file)
            return

        # If duplicates are not allowed then we check if we've seen this file
        #  before via checksum. We also check that the file exists at the
        #   location we believe it to be.
        # If we find a checksum match but the file doesn't exist where we
        #  believe it to be then we write a debug log and proceed to import.
        checksum_file = db.get_hash(checksum)
        if (allow_duplicate is False and checksum_file is not None):
            if (os.path.isfile(checksum_file)):
                log.info('%s already exists at %s. Skipping...' %
                         (_file, checksum_file))
                return
            else:
                log.info(
                    '%s matched checksum but file not found at %s. Importing again...'
                    % (  # noqa
                        _file, checksum_file))

        # If source and destination are identical then
        #  we should not write the file. gh-210
        if (_file == dest_path):
            print('Final source and destination path should not be identical')
            return

        self.create_directory(dest_directory)

        if (move is True):
            stat = os.stat(_file)
            shutil.move(_file, dest_path)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            compatability._copyfile(_file, dest_path)
            self.set_utime(media)

        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        return dest_path
Esempio n. 13
0
    def process_file(self, _file, destination, media, **kwargs):
        move = False
        if('move' in kwargs):
            move = kwargs['move']

        allow_duplicate = False
        if('allowDuplicate' in kwargs):
            allow_duplicate = kwargs['allowDuplicate']

        if(not media.is_valid()):
            print('%s is not a valid media file. Skipping...' % _file)
            return

        metadata = media.get_metadata()

        directory_name = self.get_folder_path(metadata)

        dest_directory = os.path.join(destination, directory_name)
        file_name = self.get_file_name(media)
        dest_path = os.path.join(dest_directory, file_name)

        db = Db()
        checksum = db.checksum(_file)
        if(checksum is None):
            log.info('Could not get checksum for %s. Skipping...' % _file)
            return

        # If duplicates are not allowed then we check if we've seen this file
        #  before via checksum. We also check that the file exists at the
        #   location we believe it to be.
        # If we find a checksum match but the file doesn't exist where we
        #  believe it to be then we write a debug log and proceed to import.
        checksum_file = db.get_hash(checksum)
        if(allow_duplicate is False and checksum_file is not None):
            if(os.path.isfile(checksum_file)):
                log.info('%s already exists at %s. Skipping...' % (
                    _file,
                    checksum_file
                ))
                return
            else:
                log.info('%s matched checksum but file not found at %s. Importing again...' % (  # noqa
                    _file,
                    checksum_file
                ))

        self.create_directory(dest_directory)

        if(move is True):
            stat = os.stat(_file)
            shutil.move(_file, dest_path)
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
        else:
            # Do not use copy2(), will have an issue when copying to a
            # network/mounted drive using copy and manual
            # set_date_from_filename gets the job done
            shutil.copy(_file, dest_path)
            self.set_utime(media)

        db.add_hash(checksum, dest_path)
        db.update_hash_db()

        return dest_path