def update_database_backend(schema_name, table_name, start_ts): """ Update backend command history table. This table can be updated each time a `pydoni` command is run. """ logger.info( f'Appending a record to command history table {schema_name}.{table_name}' ) pg = pydoni.Postgres() end_ts = datetime.datetime.utcnow() def get_pydoni_attr(attr_name): if hasattr(pydoni, attr_name): attr_value = getattr(pydoni, attr_name) return str(attr_value) if attr_value is not None else 'NULL' else: return 'NULL' e = get_pydoni_attr('pydonicli_e') args = get_pydoni_attr('pydonicli_args') result = get_pydoni_attr('pydonicli_result') command_name = get_pydoni_attr('pydonicli_command_name') if command_name == 'NULL': logger.warning( pydoni.advanced_strip( """Expected variable `pydoni.pydonicli_command_name` not found, command history log not updated""")) else: elapsed_sec = datetime.datetime.timestamp( end_ts) - datetime.datetime.timestamp(start_ts) columns_values = [ ('command_name', command_name), ('start_ts', start_ts), ('end_ts', end_ts), ('elapsed_sec', elapsed_sec), ('error_msg', e), ('args', args), ('result', result), ] if 'pg' in locals() or 'pg' in globals(): insert_sql = pg.build_insert( schema_name=schema_name, table_name=table_name, columns=[col for col, val in columns_values], values=[val for col, val in columns_values], validate=True) pg.execute(insert_sql) else: logger.warning( f'No connection to database backend established. No record inserted to {schema_name}.{table_name}' )
def append_backup_log_table(table_schema, table_name, source, source_size_bytes, target, target_size_before_bytes, target_size_after_bytes, start_ts, end_ts, is_completed, verbose): """ Append a record to directory backup Postgres table. To be used if a backup is carried out without the use of the `pydoni data backup` command which handles the table insert automatically, but when the backup would still like to be logged in the log table. """ args, result = pydoni.__pydonicli_declare_args__(locals()), dict() vb = Verbose(verbose) pg = pydoni.Postgres() sql_value_dct = dict(source=source, source_size_bytes=source_size_bytes, target=target, target_size_before_bytes=target_size_before_bytes, target_size_after_bytes=target_size_after_bytes, start_ts=datetime.datetime.fromtimestamp(start_ts), end_ts=datetime.datetime.fromtimestamp(end_ts), is_completed=is_completed) vb.info(f'table_schema: {table_schema}') vb.info(f'table_name: {table_name}') for k, v in sql_value_dct.items(): vb.info(f'{k}: {v}') insert_sql = pg.build_insert(schema_name=table_schema, table_name=table_name, columns=[k for k, v in sql_value_dct.items()], values=[v for k, v in sql_value_dct.items()]) pg.execute(insert_sql) vb.info(f'Appended record to {table_schema}.{table_name}') result['sql_value_dct'] = sql_value_dct vb.program_complete('Append to backup log table complete') pydoni.__pydonicli_register__(dict(args=args, result=result, command_name='data.append_backup_log_table'))
def workflow(source_dpath, pg_schema, sample, full_rebuild, verbose, dry_run, no_startup_message, no_pipeline): """ Refresh Postgres Photo schema from source photo metadata. """ args = pydoni.__pydonicli_declare_args__(locals()) pydoni.__pydonicli_register__({ 'command_name': pydoni.what_is_my_name(with_modname=True), 'args': args }) # Begin pipeline stopwatch start_ts = time.time() # Set up variables used throughout entire pipeline vb = Verbose(verbose) pg = pydoni.Postgres() if vb.verbose: if not no_startup_message: print_startup_message() # Extract source media file metadata and load into Postgres pipeline_args = dict(pg=pg, vb=vb, source_dpath=source_dpath, pg_schema=pg_schema, sample=sample, full_rebuild=full_rebuild) if not no_pipeline: source_to_postgres_pipeline(**pipeline_args) # Apply transformations on data once loaded into Postgres db_transform(pg, vb, pg_schema, sample) # End workflow pydoni.__pydonicli_register__( {k: v for k, v in locals().items() if k in ['result']}) msg = f'Photo database refresh complete' vb.program_complete(msg, start_ts=start_ts)
'--notify', ] result = runner.invoke(to_gif, args) if result.exception is not None: raise result.exception self.assertEqual(result.exit_code, 0) self.assertTrue(isfile(expected_gif_fpath1)) self.assertTrue(isfile(expected_gif_fpath2)) if isfile(expected_gif_fpath1): remove(expected_gif_fpath1) if isfile(expected_gif_fpath2): remove(expected_gif_fpath2) tests_dir = dirname(abspath(__file__)) root_dir = dirname(tests_dir) chdir(root_dir) pg = pydoni.Postgres(credentials_fpath='~/.pgpass') runner = CliRunner() case = TestPydoniCLI() test_methods = [x for x in dir(case) if x.startswith('test_')] for method in test_methods: getattr(case, method)()
def test_Postgres(self): try: pg = pydoni.Postgres() connected = True except: connected = False if connected: # Not all users of this library will have Postgres set up, so only execute # tests if Postgres connected successfully. # # Now perform a series of commands to ensure that Postgres class methods are # working as intended. # Method: read_sql(). Choose a default table that always exists result = pg.read_sql('select * from information_schema.tables limit 1') # Method: get_table_name() self.assertEqual(pg.get_table_name(schema_name=None, table_name='pg_stat'), 'pg_stat') self.assertEqual(pg.get_table_name(schema_name='information_schema', table_name='tables'), 'information_schema.tables') # Method: validate_dtype() result = pg.validate_dtype(schema_name='information_schema', table_name='tables', col='table_catalog', val='string value') self.assertEqual(result, True) result = pg.validate_dtype(schema_name='information_schema', table_name='tables', col='table_catalog', val=5) self.assertEqual(result, False) # Method: infoschema() result = sorted(pg.infoschema(infoschema_table='tables').columns) result_sql_direct = sorted(pg.read_sql('select * from information_schema.tables limit 1').columns) self.assertEqual(result, result_sql_direct) # Method: build_update() result = pg.build_update(schema_name='pg_catalog', table_name='pg_stat_database', pkey_name='datid', pkey_value=12345, columns=['tup_returned', 'tup_fetched'], values=[11111, 22222], validate=True, newlines=False) expectation = 'UPDATE pg_catalog.pg_stat_database SET "tup_returned"=11111, "tup_fetched"=22222 WHERE "datid" = 12345' self.assertEqual(result, expectation) result = pg.build_update(schema_name='pg_catalog', table_name='pg_stat_database', pkey_name='datid', pkey_value=12345, columns=['tup_returned', 'tup_fetched'], values=[11111, 22222], validate=False, newlines=True) expectation = 'UPDATE pg_catalog.pg_stat_database\nSET "tup_returned"=11111, \n "tup_fetched"=22222\nWHERE "datid" = 12345' self.assertEqual(result, expectation) # Method: build_insert() result = pg.build_insert(schema_name='pg_catalog', table_name='pg_stat_database', columns=['tup_returned', 'tup_fetched'], values=[11111, 22222], validate=True, newlines=False) expectation = 'insert into pg_catalog.pg_stat_database ("tup_returned", "tup_fetched") values (11111, 22222)' self.assertEqual(result, expectation) result = pg.build_insert(schema_name='pg_catalog', table_name='pg_stat_database', columns=['tup_returned', 'tup_fetched'], values=[11111, 22222], validate=False, newlines=True) expectation = 'insert into pg_catalog.pg_stat_database ("tup_returned", "tup_fetched")\nvalues (11111, 22222)' self.assertEqual(result, expectation) # Method: build_delete() result = pg.build_delete(schema_name='pg_catalog', table_name='pg_stat_database', pkey_name='datid', pkey_value=12345, newlines=False) expectation = 'delete from pg_catalog.pg_stat_database where datid = 12345' self.assertEqual(result, expectation) result = pg.build_delete(schema_name='pg_catalog', table_name='pg_stat_database', pkey_name='datid', pkey_value=12345, newlines=True) expectation = 'delete from pg_catalog.pg_stat_database\nwhere datid = 12345' self.assertEqual(result, expectation) # Method: col_names() result = pg.col_names(schema_name='pg_catalog', table_name='pg_stat_database') self.assertIn('datid', result) # Method: col_dtypes() result = pg.col_dtypes(schema_name='pg_catalog', table_name='pg_stat_database') self.assertEqual(result['datid'], 'oid') # Method: read_table() result = pg.read_table(schema_name='pg_catalog', table_name='pg_stat_database') self.assertGreater(result.shape[0], 0) self.assertGreater(result.shape[1], 0) # Method: dump() # Just make sure pg_dump is installed pydoni.find_binary('pg_dump', abort=True) # # All create*, drop* and list* methods # pid = getpid() test_schema_name = f'test_pydoni_schema_{pid}' test_table_name = f'test_pydoni_table_{pid}' test_view_name = f'test_pydoni_view_{pid}' pg.create_schema(schema_name=test_schema_name) try: # If any of the following fail, delete the test schema and raise error columnspec = {'col1': 'int', 'col2': 'text'} pg.create_table(schema_name=test_schema_name, table_name=test_table_name, columnspec=columnspec, if_not_exists=False) result = pg.table_exists(schema_name=test_schema_name, table_name=test_table_name) self.assertTrue(result) pg.create_table(schema_name=test_schema_name, table_name=test_table_name, columnspec=columnspec, if_not_exists=True) insert_fake_data_sql = pg.build_insert(schema_name=test_schema_name, table_name=test_table_name, columns=[k for k, v in columnspec.items()], values=[5, 'test'], validate=False, newlines=False) pg.execute(insert_fake_data_sql) pg.create_view(schema_name=test_schema_name, view_name=test_view_name, view_sql=f'select * from {test_schema_name}.{test_table_name}', or_replace=False) result = pg.view_exists(schema_name=test_schema_name, view_name=test_view_name) self.assertTrue(result) pg.create_view(schema_name=test_schema_name, view_name=test_view_name, view_sql=f'select * from {test_schema_name}.{test_table_name}', or_replace=True) result = pg.read_table(schema_name=test_schema_name, table_name=test_view_name) self.assertEqual(result.to_dict(), {'col1': {0: 5}, 'col2': {0: 'test'}}) pg.drop_view(schema_name=test_schema_name, view_name=test_view_name) pg.drop_table(schema_name=test_schema_name, table_name=test_table_name) pg.drop_schema(schema_name=test_schema_name) except Exception: pg.drop_schema(schema_name=test_schema_name) raise Exception # Method: _single_quote() self.assertEqual(pg._single_quote(5), 5) self.assertEqual(pg._single_quote('test'), "'test'") self.assertEqual(pg._single_quote("test's"), "'test''s'")
def pg_dump(backup_dir, db_name, pg_user, sep, pgdump, csvdump, max_dir_size, dry_run, verbose): """ Dump a local Postgres database. Looks for ~/.pgpass by default. """ args, result = pydoni.__pydonicli_declare_args__(locals()), dict() vb = Verbose(verbose) if dry_run: vb.info('Not executing any code (dry run)') if pg_user is not None and db_name is not None: pg = pydoni.Postgres(pg_user=pg_user, db_name=db_name) else: # Attempt to parse ~/.pgpass file. Fail if this file does not exist or is not # able to be parsed pg = pydoni.Postgres() # Define subfolder to dump files to within dump directory subdir = pydoni.systime(compact=True) + '_' + pg.db_name backup_subdir = join(expanduser(backup_dir), subdir) mkdir(backup_subdir) vb.info('Database: ' + pg.db_name) vb.info('Destination folder: ' + backup_subdir) # Dump database based on user's preference # May dump using pg_dump, export tables to CSV, or both dumped_files = [] if pgdump: vb.info('Executing `pg_dump`') if not dry_run: dumped_dbfile = pg.dump(backup_dir=backup_subdir) dumped_files += [dumped_dbfile] if csvdump: # Dump each file to textfile vb.info('Executing CSV dump to tables') if not dry_run: dumped_csvfiles = pg.dump_tables(backup_dir=backup_subdir, sep=sep, coerce_csv=False) dumped_files += dumped_csvfiles result['backup_directory'] = backup_subdir result['dumped_files'] = {} for f in dumped_files: result['dumped_files'][basename(f)] = dict( filesize=stat(f).st_size, filesize_readable=pydoni.human_filesize(stat(f).st_size), created=datetime.datetime.fromtimestamp(getctime(f)).strftime('%Y-%m-%d %H:%M:%S.%f'), rows=pydoni.textfile_len(f)) if verbose: vb.line_break() tt_list = [[basename(file), infodict['created'], pydoni.human_filesize(infodict['filesize']), str(infodict['rows']) ] for file, infodict in result['dumped_files'].items()] if len(tt_list): if verbose: print(tt.to_string( tt_list, header=[click.style(x, bold=True) for x in ['File', 'Created', 'Size', 'Rows']], style=tt.styles.ascii_thin_double, padding=(0, 1), alignment='ccrr')) else: vb.warn('No database files were dumped!') if dry_run: rmdir(backup_subdir) max_dir_size_enforced = False removed_old_backup_dirs = [] if max_dir_size: # Check size of `backup_dir` and clear any backup directories until the total size # is less than max_dir_size (upper GB limit) subdirs = sorted([x for x in pathlib.Path(backup_dir).iterdir() if isdir(x)], key=getmtime) subdirs_size = zip(subdirs, [pydoni.dirsize(x) / 1e9 for x in subdirs]) total_size = sum([y for x, y in subdirs_size]) if total_size > max_dir_size: vb.warn(f'Enforcing maximum directory size: {str(max_dir_size)} GB') max_dir_size_enforced = True while total_size > max_dir_size: dir_to_remove = str(subdirs[0]) shutil.rmtree(dir_to_remove) removed_old_backup_dirs.append(dir_to_remove) subdirs = sorted([x for x in pathlib.Path(backup_dir).iterdir() if isdir(x)], key=getmtime) subdirs_size = zip(subdirs, [pydoni.dirsize(x) / 1e9 for x in subdirs]) total_size = sum([y for x, y in subdirs_size]) vb.warn(f'Removed "{basename(dir_to_remove)}"') vb.program_complete('Postgres dump complete') result['max_dir_size_enforced'] = max_dir_size_enforced result['removed_old_backup_dirs'] = [basename(x) for x in removed_old_backup_dirs] pydoni.__pydonicli_register__(dict(args=args, result=result, command_name='data.pg_dump'))
def backup(source, target, update_log_table, use_rsync, verbose, debug, dry_run): """ Back up a source directory to a target directory. This function will accept a source and target directories, most often on separate external hard drives, and copy all files from the source to the target that are either: (1) Not in the target directory (2) Are in the target directory, but have been updated Files in the target that have been deleted in the source will also be deleted. """ args, result = pydoni.__pydonicli_declare_args__(locals()), dict() start_ts = time.time() vb = Verbose(verbose=verbose, debug=debug) ws = ' ' ignore_files = [ 'The Office S09E16 Moving On.mkv', 'The Office S09E20 Paper Airplanes.mkv', ] if update_log_table: start_ts_utc = datetime.datetime.utcnow() pg = pydoni.Postgres() directory_backup_table_schema = 'pydonicli' directory_backup_table_name = 'directory_backup' insert_dict = dict(source=source, source_size_bytes=stat(source).st_size, target=target, target_size_before_bytes=stat(target).st_size, target_size_after_bytes=None, start_ts=start_ts_utc, is_completed=False) insert_sql = pg.build_insert(schema_name=directory_backup_table_schema, table_name=directory_backup_table_name, columns=list(insert_dict.keys()), values=list(insert_dict.values()), validate=True) if not dry_run: pg.execute(insert_sql) directory_backup_id = pg.read_sql(f""" select directory_backup_id from {directory_backup_table_schema}.{directory_backup_table_name} order by gen_ts desc limit 1""").squeeze() assert source != target, 'Source and target directories must be different' if use_rsync: cmd_lst = ['rsync', '--delete-before', '-a', '-h', '-u'] if verbose: cmd_lst = cmd_lst + ['-v', '--progress'] cmd_lst = cmd_lst + [f'"{source}"'] + [f'"{target}"'] cmd = ' '.join(cmd_lst) subprocess.call(cmd, shell=True) # progress_flag = ' --progress' if verbose else '' # backup_cmd = f'rsync -avhu{progress_flag} --delete-before "{source}" "{target}"' # subprocess.call(backup_cmd, shell=True) else: vb.info(f'Listing files at source: {source}') files_source = pydoni.listfiles(path=source, recursive=True, full_names=True) vb.debug('Found files at source: ' + str(len(files_source))) files_source = [x for x in files_source if x not in ignore_files] vb.debug(f'Found files at source after filtering out manually ignored files: {len(files_source)}') vb.info(f'Listing files at target: {target}') files_target = pydoni.listfiles(path=target, recursive=True, full_names=True) vb.debug('Found files at target: ' + str(len(files_target))) files_target = [x for x in files_target if x not in ignore_files] vb.debug(f'Found files at target after filtering out manually ignored files: {len(files_target)}') # Scan source files and for each determine whether to do nothing, copy to target, # or replace at target copied_files = [] replaced_files = [] vb.info('Scanning for new, updated or deleted files at source') vb.pbar_init(total=len(files_source), unit='file') for sourcefile in files_source: vb.pbar_write(f'Sourcefile: {sourcefile}', refer_debug=True) vb.pbar.set_postfix({'file': basename(sourcefile)}) targetfile = sourcefile.replace(source, target) vb.pbar_write(f'{ws}Expected mirrored targetfile: {targetfile}', refer_debug=True) if not isfile(targetfile): # Copy file to target. Create parent directory at target if not exists vb.pbar_write(f'{ws}(Copy) attempting to copy file "{sourcefile}" to "{targetfile}"', refer_debug=True) targetdpath = dirname(targetfile) if not isdir(targetdpath): vb.pbar_write(f'{ws}{ws}Parent directory of targetfile does not exist, creating it at: ' + targetdpath, refer_debug=True) if not dry_run: makedirs(targetdpath) vb.pbar_write(f'{ws}{ws}Successful', refer_debug=True) if not dry_run: shutil.copy2(sourcefile, targetfile) vb.pbar_write(f'{ws}Successful', refer_debug=True) copied_files.append(sourcefile) elif isfile(targetfile) and is_file_changed(sourcefile, targetfile): # Replace file at target (same action as copy, but parent directory must exist) vb.pbar_write(f'(Replace) attempting to copy file "{sourcefile}" to "{targetfile}"', refer_debug=True) if not dry_run: shutil.copy2(sourcefile, targetfile) vb.pbar_write(f'Successful', refer_debug=True) replaced_files.append(sourcefile) else: vb.pbar_write(f'{ws}Targetfile already exists and is unchanged', refer_debug=True) vb.pbar_update(1) vb.pbar_close() # Scam target files and for each determine whether that file has been since # deleted from source deleted_files = [] vb.info('Scanning for files at target since deleted from source') vb.pbar_init(total=len(files_target)) for targetfile in files_target: sourcefile = targetfile.replace(target, source) vb.pbar.set_postfix({'file': basename(targetfile)}) if not isfile(sourcefile) and not isdir(sourcefile): vb.pbar_write(f'(Delete) attempting to delete "{targetfile}"', refer_debug=True) if not dry_run: send2trash(targetfile) vb.pbar_write(f'{ws}Successful', refer_debug=True) deleted_files.append(targetfile) vb.pbar_update(1) vb.pbar_close() # Record number of files copied, replaced and deleted vb.info(f'Copied {len(copied_files)} files') vb.info(f'Replaced {len(replaced_files)} files') vb.info(f'Deleted {len(deleted_files)} files') vb.info(f'Unchanged {len(files_source) - len(copied_files) - len(replaced_files) - len(deleted_files)} files') result = dict(copied=len(copied_files), replaced=len(replaced_files), deleted=len(deleted_files), unchanged=len(files_source) - len(copied_files) - len(replaced_files) - len(deleted_files)) if update_log_table: vb.debug('Attempting to update log table with results...') update_dict = dict(target_size_after_bytes=pydoni.dirsize(target), end_ts=datetime.datetime.utcnow(), is_completed=True) update_sql = pg.build_update(schema_name=directory_backup_table_schema, table_name=directory_backup_table_name, pkey_name='directory_backup_id', pkey_value=directory_backup_id, columns=list(update_dict.keys()), values=list(update_dict.values()), validate=True) if not dry_run: pg.execute(update_sql) vb.debug(f'{ws}Successful') vb.program_complete('Backup complete', start_ts=start_ts) pydoni.__pydonicli_register__(dict(args=args, result=result, command_name='data.backup'))
def refresh_imdb_table(schema_name, table_name, omdbapikey, verbose=False): """ Query Postgres table containing IMDB metadata and refresh any values that need updating. """ args, result = pydoni.__pydonicli_declare_args__(locals()), dict() # 'result' will be a dictionary where the movie names are the keys, and the values are # dictionaries with items: 'status', 'message', 'updated_values' (dictionary of # updated values, if any). result_items = ['status', 'message', 'updated_values'] pg = pydoni.Postgres() pkey_name = 'movie_id' df = pg.read_table(schema_name, table_name).sort_values(pkey_name) cols = pg.col_names(schema_name, table_name) if verbose: pbar = tqdm(total=len(df), unit='movie') for i, row in df.iterrows(): movie_name = f"{row['title']} ({str(row['release_year'])})" try: omdbresp = query_omdb(title=row['title'], release_year=row['release_year'], omdbapikey=omdbapikey) except Exception as e: err_str = f"{click.style('ERROR', fg='red')} in {movie_name}: {str(e)}" tqdm.write(err_str) result[movie_name] = { k: v for k, v in zip(result_items, ['Error', str(e), None]) } if verbose: pbar.update(1) continue omdbresp = {k: v for k, v in omdbresp.items() if k in cols} omdbresp = {k: replace_null(v) for k, v in omdbresp.items()} color_map = { 'No change': 'yellow', 'Updated': 'green', 'Not found': 'red' } change = 'Not found' if not len(omdbresp) else 'No change' # Filter out columns and values that do not require an update if change != 'Not found': upd = filter_updated_values(omdbresp, row) change = 'Updated' if len(upd) else change upd['imdb_update_ts'] = datetime.datetime.now() stmt = pg.build_update(schema_name, table_name, pkey_name=pkey_name, pkey_value=row[pkey_name], columns=[k for k, v in upd.items()], values=[v for k, v in upd.items()], validate=True) pg.execute(stmt) upd_backend = { k: v for k, v in upd.items() if k != 'imdb_update_ts' } upd_backend = upd_backend if len(upd_backend) else None result[movie_name] = { k: v for k, v in zip(result_items, [change, None, upd_backend]) } else: result[movie_name] = { k: v for k, v in zip(result_items, [change, None, None]) } if verbose: pbar.update(1) space = ' ' if change == 'Updated' else '' tqdm.write( click.style(change, fg=color_map[change]) + space + ': ' + movie_name) if verbose: pbar.close() pydoni.program_complete('Movie refresh complete!') pydoni.__pydonicli_register__( dict(args=args, result=result, command_name='movie.refresh_imdb_table'))