Exemple #1
0
    def test_listfiles(self):
        result_nonrecursive = pydoni.listfiles(path=join(root_dir, 'tests', 'test_data', 'txt', 'nonempty_dir_flat'))
        self.assertGreater(len(result_nonrecursive), 0)

        result_nonrecursive_txt = pydoni.listfiles(path=join(root_dir, 'tests', 'test_data', 'txt', 'nonempty_dir_flat'), ext='txt')
        self.assertGreater(len(result_nonrecursive_txt), 0)
        unique_extensions = list(set([splitext(x)[1] for x in result_nonrecursive_txt]))
        self.assertEqual(len(unique_extensions), 1)
        self.assertEqual(unique_extensions[0], '.txt')

        result_recursive = pydoni.listfiles(path=join(root_dir, 'tests', 'test_data', 'txt', 'nonempty_dir_subdirs'), recursive=True)
        self.assertGreater(len(result_recursive), 0)
        self.assertTrue(any(['subdir_1_1_3' in x for x in result_recursive]))
Exemple #2
0
    def test_data_pg_dump(self):
        pg_dump_dir = join(tests_dir, 'test_data/pg_dump')

        args = [
            '--backup-dir',
            pg_dump_dir,
            # '--db-name',
            # '--pg-user',
            '--sep',
            ","
            '--pgdump',
            '--csvdump',
            '--max-dir-size',
            5,
            '--dry-run',
            '--verbose',
        ]

        result = runner.invoke(pg_dump, args)
        if result.exception is not None:
            raise result.exception

        self.assertEqual(result.exit_code, 0)

        self.assertEqual(
            len(pydoni.listfiles(path=pg_dump_dir, recursive=True)), 0)
 def test_test_data_folder(self):
     test_data_fpaths = pydoni.listfiles(path=join(tests_dir, 'test_data'),
                                         recursive=True,
                                         include_hidden=False)
     for fpath in test_data_fpaths:
         fpath_relative = re.sub(r'(.*?)(test_data\/)(.*)', r'\2\3', fpath)
         self.assertTrue(
             basename(fpath_relative).startswith('test_'),
             msg=f'File "{fpath_relative}" does not start with "test_"')
Exemple #4
0
    def test_split_video_scenes(self):
        if pydoni.find_binary('scenedetect'):
            test_video = join(tests_dir, 'test_data/video/test_austin_to_sd_subset.m4v')

            tmp_dpath = join(tests_dir, 'test_data/video', 'tmp_scenedetect')
            if isdir(tmp_dpath):
                shutil.rmtree(tmp_dpath)

            result = pydoni.split_video_scenes(fpath=test_video, output_dpath=tmp_dpath)
            self.assertNotIn('Traceback', result.decode('utf-8'))

            split_files = pydoni.listfiles(path=tmp_dpath)
            if isdir(tmp_dpath):
                shutil.rmtree(tmp_dpath)

            self.assertGreater(len(split_files), 0)
Exemple #5
0
def split_batch_exported_timelapse(dpath):
    """
    Split a directory of exported timelapse stills into their respective folders.

    Accept a directory of exported timelapse stills from Lightroom following Andoni's
    photo file naming convention. Those timelapse files will be in sequences numbered
    from 1 to the final # of the timelapse series, then will reset to 1 for the next
    timelapse. There could be an arbitrary number of timelapse stills in this directory.

    This program will take all the files in that directory and split them into folders
    for easy categorization.
    """
    args, result = pydoni.__pydonicli_declare_args__(locals()), dict()

    os.chdir(dpath)

    files = pydoni.listfiles(ext='jpg')
    assert len(files), "No timelapse stills found!"

    seqnums = [int(os.path.splitext(f.split('_')[4])[0]) for f in files]
    differences = []
    for i, num in enumerate(seqnums):
        last_idx = i - 1
        last_idx = last_idx if last_idx >= 0 else 0
        last_num = seqnums[last_idx]
        differences.append(num - last_num)

    delimiters = [i for i, x in enumerate(differences) if x not in [0, 1]]
    files_list_of_lists = pydoni.split_at(files, delimiters)

    for i, list_of_files in enumerate(files_list_of_lists):
        dname = 'timelapse_%s_of_%s' % (str(i + 1),
                                        str(len(files_list_of_lists)))
        if not os.path.isdir(dname):
            os.mkdir(dname)
        for fname in list_of_files:
            newfname = os.path.join(dname, fname)
            os.rename(fname, newfname)

    result['directories_created'] = len(files_list_of_lists)
    pydoni.__pydonicli_register__(
        dict(args=args,
             result=result,
             command_name='photo.split_batch_exported_timelapse'))
Exemple #6
0
def website_extract_image_titles(website_export_dpath, outfile, verbose):
    """
    Scan photo files exported for andonisooklaris.com and construct list of image filenames
    and titles, separated by collection.
    """
    args, result = pydoni.__pydonicli_declare_args__(locals()), dict()

    def echo(*args, **kwargs):
        kwargs['timestamp'] = True
        pydoni.echo(*args, **kwargs)

    website_export_dpath = expanduser(website_export_dpath)
    if outfile == 'auto':
        outfile = os.path.join(
            website_export_dpath,
            'Image Titles %s.txt' % pydoni.sysdate(stripchars=True))
    elif outfile is not None:
        assert not os.path.isfile(outfile)

    files = pydoni.listfiles(path=website_export_dpath,
                             recursive=True,
                             full_names=True)
    files = [f for f in files if os.path.splitext(f)[1].lower() != '.txt']

    if verbose:
        echo('Files found: ' + str(len(files)))
        echo('Extracting EXIF metadata...')
        exifd = pydoni.EXIF(files).extract()
        echo('EXIF metadata successfully extracted')

        if outfile is not None:
            echo('Writing output datafile: ' + outfile)
    else:
        exifd = pydoni.EXIF(files).extract()

    i = 0
    tracker = pd.DataFrame(columns=['collection', 'file', 'title'])
    for file in files:
        elements = file.replace(website_export_dpath,
                                '').lstrip('/').split('/')
        subcollection = None
        collection = elements[0]
        fname = elements[-1]

        if len(elements) == 3:
            subcollection = elements[1]
            collection += ' - ' + subcollection

        exif = exifd[os.path.join(website_export_dpath, file)]
        title = exif['Title'] if 'Title' in exif.keys() else ''
        year = fname[0:4]
        title = str(year) + ' ' + str(title)

        tracker.loc[i] = [collection, fname, title]
        i += 1

    print_lst = []
    for collection in tracker['collection'].unique():
        print_lst.append('\nCollection: %s\n' % collection)
        df_print = tracker.loc[tracker['collection'] == collection].drop(
            'collection', axis=1)
        print_lst.append(
            tabulate(df_print, showindex=False, headers=df_print.columns))

    print_str = '\n'.join(print_lst).strip()
    if outfile is None:
        print(print_str)
    else:
        with open(outfile, 'w') as f:
            f.write(print_str)

    if verbose:
        pydoni.program_complete()

    result['n_collections'] = len(tracker['collection'].unique())
    pydoni.__pydonicli_register__(
        dict(args=args,
             result=result,
             command_name='photo.website_extract_image_titles'))
Exemple #7
0
def source_to_postgres_pipeline(pg, vb, source_dpath, pg_schema, sample,
                                full_rebuild):
    """
    Query source mediafiles for metadata.
    """
    vb.info(
        f'{title_border_left} {format_text("Source to Postgres Pipeline", "title")} {title_border_right}'
    )

    assert isdir(source_dpath), f'Source directory "{source_dpath}" not found!'

    if full_rebuild:
        vb.info(format_text('Prepare database', 'title'))
        pg.drop_and_recreate_schema(pg_schema)
        vb.info(
            f'Dropped and cre-created schema {format_text(pg_schema, "code")}',
            arrow='white')

    vb.info(format_text('Load source files into stack', 'title'))
    vb.info(
        f'Listing source media files at {format_text(source_dpath, "path")}...',
        arrow='white')
    fpaths = pydoni.listfiles(source_dpath, recursive=True, full_names=True)

    if isinstance(sample, int):
        # Limit to specified number of files in commandline argument `sample`
        if sample > 0:
            import random
            fpaths = random.sample(fpaths, sample)
            vb.warn(f'Artificially limited source to {sample} files', indent=2)

    vb.info(f'{len(fpaths)} source media files found', indent=2)

    vb.info('Gathering source file type, size and modification time...',
            arrow='white')
    filebase = build_filebase_dataframe(fpaths)
    vb.info(f'Successfully built filebase, shape: {filebase.shape}', indent=2)

    gerund = 'Rebuilding' if full_rebuild else 'Refreshing'

    filebase_table_name = 'filebase_history'
    vb.info(
        f'{gerund} {format_text(pg_schema + "." + filebase_table_name, "code")}',
        arrow='white')

    refresh_filebase_history_args = dict(pg=pg,
                                         vb=vb,
                                         filebase=filebase,
                                         pg_schema=pg_schema,
                                         table_name=filebase_table_name,
                                         full_rebuild=full_rebuild)
    filebase_changes = refresh_filebase_history(pg, vb, filebase, pg_schema,
                                                filebase_table_name,
                                                full_rebuild)

    vb.info(format_text('Extract and load EXIF metadata', 'title'))

    raw_exif_table_name = 'raw_exif_history'
    vb.info(
        f'{gerund} {format_text(pg_schema + "." + raw_exif_table_name, "code")}',
        arrow='white')

    load_exif_history_table_args = dict(pg=pg,
                                        vb=vb,
                                        filebase=filebase,
                                        filebase_changes=filebase_changes,
                                        pg_schema=pg_schema,
                                        table_name=raw_exif_table_name,
                                        full_rebuild=full_rebuild)
    load_exif_history_table(**load_exif_history_table_args)

    vb.info(format_text('Define database objects', 'title'))
    vb.info('Defining views...', arrow='white')
    define_view(pg, 'filebase_vw')
    vb.info(f'{format_text("filebase_vw", "code")}', indent=2)

    vb.info(
        format_text(
            'Postgres database successfuully refreshed with source file metadata ✔️',
            'title'))
Exemple #8
0
def backup(source, target, update_log_table, use_rsync, verbose, debug, dry_run):
    """
    Back up a source directory to a target directory.

    This function will accept a source and target directories, most often
    on separate external hard drives, and copy all files from the source
    to the target that are either:

        (1) Not in the target directory
        (2) Are in the target directory, but have been updated

    Files in the target that have been deleted in the source will also be deleted.
    """
    args, result = pydoni.__pydonicli_declare_args__(locals()), dict()

    start_ts = time.time()
    vb = Verbose(verbose=verbose, debug=debug)
    ws = '  '

    ignore_files = [
        'The Office S09E16 Moving On.mkv',
        'The Office S09E20 Paper Airplanes.mkv',
    ]

    if update_log_table:
        start_ts_utc = datetime.datetime.utcnow()
        pg = pydoni.Postgres()
        directory_backup_table_schema = 'pydonicli'
        directory_backup_table_name = 'directory_backup'

        insert_dict = dict(source=source,
                           source_size_bytes=stat(source).st_size,
                           target=target,
                           target_size_before_bytes=stat(target).st_size,
                           target_size_after_bytes=None,
                           start_ts=start_ts_utc,
                           is_completed=False)

        insert_sql = pg.build_insert(schema_name=directory_backup_table_schema,
                                     table_name=directory_backup_table_name,
                                     columns=list(insert_dict.keys()),
                                     values=list(insert_dict.values()),
                                     validate=True)
        if not dry_run:
            pg.execute(insert_sql)

        directory_backup_id = pg.read_sql(f"""
        select directory_backup_id
        from {directory_backup_table_schema}.{directory_backup_table_name}
        order by gen_ts desc
        limit 1""").squeeze()


    assert source != target, 'Source and target directories must be different'

    if use_rsync:
        cmd_lst = ['rsync', '--delete-before', '-a', '-h', '-u']
        if verbose:
            cmd_lst = cmd_lst + ['-v', '--progress']

        cmd_lst = cmd_lst + [f'"{source}"'] + [f'"{target}"']
        cmd = ' '.join(cmd_lst)

        subprocess.call(cmd, shell=True)

        # progress_flag = ' --progress' if verbose else ''
        # backup_cmd = f'rsync -avhu{progress_flag} --delete-before "{source}" "{target}"'
        # subprocess.call(backup_cmd, shell=True)

    else:
        vb.info(f'Listing files at source: {source}')
        files_source = pydoni.listfiles(path=source, recursive=True, full_names=True)
        vb.debug('Found files at source: ' + str(len(files_source)))
        files_source = [x for x in files_source if x not in ignore_files]
        vb.debug(f'Found files at source after filtering out manually ignored files: {len(files_source)}')

        vb.info(f'Listing files at target: {target}')
        files_target = pydoni.listfiles(path=target, recursive=True, full_names=True)
        vb.debug('Found files at target: ' + str(len(files_target)))
        files_target = [x for x in files_target if x not in ignore_files]
        vb.debug(f'Found files at target after filtering out manually ignored files: {len(files_target)}')

        # Scan source files and for each determine whether to do nothing, copy to target,
        # or replace at target
        copied_files = []
        replaced_files = []
        vb.info('Scanning for new, updated or deleted files at source')
        vb.pbar_init(total=len(files_source), unit='file')

        for sourcefile in files_source:
            vb.pbar_write(f'Sourcefile: {sourcefile}', refer_debug=True)
            vb.pbar.set_postfix({'file': basename(sourcefile)})

            targetfile = sourcefile.replace(source, target)
            vb.pbar_write(f'{ws}Expected mirrored targetfile: {targetfile}', refer_debug=True)

            if not isfile(targetfile):
                # Copy file to target. Create parent directory at target if not exists
                vb.pbar_write(f'{ws}(Copy) attempting to copy file "{sourcefile}" to "{targetfile}"', refer_debug=True)

                targetdpath = dirname(targetfile)
                if not isdir(targetdpath):
                    vb.pbar_write(f'{ws}{ws}Parent directory of targetfile does not exist, creating it at: ' + targetdpath, refer_debug=True)
                    if not dry_run:
                        makedirs(targetdpath)

                    vb.pbar_write(f'{ws}{ws}Successful', refer_debug=True)

                if not dry_run:
                    shutil.copy2(sourcefile, targetfile)

                vb.pbar_write(f'{ws}Successful', refer_debug=True)
                copied_files.append(sourcefile)

            elif isfile(targetfile) and is_file_changed(sourcefile, targetfile):
                # Replace file at target (same action as copy, but parent directory must exist)
                vb.pbar_write(f'(Replace) attempting to copy file "{sourcefile}" to "{targetfile}"', refer_debug=True)
                if not dry_run:
                    shutil.copy2(sourcefile, targetfile)

                vb.pbar_write(f'Successful', refer_debug=True)
                replaced_files.append(sourcefile)

            else:
                vb.pbar_write(f'{ws}Targetfile already exists and is unchanged', refer_debug=True)

            vb.pbar_update(1)

        vb.pbar_close()

        # Scam target files and for each determine whether that file has been since
        # deleted from source
        deleted_files = []
        vb.info('Scanning for files at target since deleted from source')
        vb.pbar_init(total=len(files_target))
        for targetfile in files_target:
            sourcefile = targetfile.replace(target, source)
            vb.pbar.set_postfix({'file': basename(targetfile)})

            if not isfile(sourcefile) and not isdir(sourcefile):
                vb.pbar_write(f'(Delete) attempting to delete "{targetfile}"', refer_debug=True)
                if not dry_run:
                    send2trash(targetfile)

                vb.pbar_write(f'{ws}Successful', refer_debug=True)
                deleted_files.append(targetfile)

            vb.pbar_update(1)

        vb.pbar_close()

        # Record number of files copied, replaced and deleted
        vb.info(f'Copied {len(copied_files)} files')
        vb.info(f'Replaced {len(replaced_files)} files')
        vb.info(f'Deleted {len(deleted_files)} files')
        vb.info(f'Unchanged {len(files_source) - len(copied_files) - len(replaced_files) - len(deleted_files)} files')
        result = dict(copied=len(copied_files),
                      replaced=len(replaced_files),
                      deleted=len(deleted_files),
                      unchanged=len(files_source) - len(copied_files) - len(replaced_files) - len(deleted_files))

    if update_log_table:
        vb.debug('Attempting to update log table with results...')

        update_dict = dict(target_size_after_bytes=pydoni.dirsize(target),
                           end_ts=datetime.datetime.utcnow(),
                           is_completed=True)

        update_sql = pg.build_update(schema_name=directory_backup_table_schema,
                                     table_name=directory_backup_table_name,
                                     pkey_name='directory_backup_id',
                                     pkey_value=directory_backup_id,
                                     columns=list(update_dict.keys()),
                                     values=list(update_dict.values()),
                                     validate=True)

        if not dry_run:
            pg.execute(update_sql)

        vb.debug(f'{ws}Successful')

    vb.program_complete('Backup complete', start_ts=start_ts)
    pydoni.__pydonicli_register__(dict(args=args, result=result, command_name='data.backup'))
Exemple #9
0
def list_views():
    """
    List all views by name (filename without extension) in the expected directory.
    """
    view_fpaths = pydoni.listfiles(view_schemas_dpath, ext='sql')
    return [splitext(basename(x))[0] for x in view_fpaths]