def main(): """Generates public data dump files from the latest prod data.""" # Connect to the latest schemas. db = DatabaseConnection(path_config='db_config.yaml') schema = db.get_latest_schema('prod_') schema_profil = db.get_latest_schema('source_internal_profil_') db.execute('SET search_path="' + schema + '", "' + schema_profil + '";') timestamp = schema[schema.rfind('_') + 1:] print('[OK] Dumping from schemas "%s" and "%s"...' % (schema, schema_profil)) # Read YAML configuration file. config = yaml_load('public_dumps.yaml') dir_save = config['save_directory'] dumps = config['dumps'] # Process all dumps. for dump_name in dumps: save_path = os.path.join(dir_save, '%s_%s.csv' % (dump_name, timestamp)) db.dump_to_CSV(dumps[dump_name]['query'], save_path) print('[OK] Saved dump "%s" to %s' % (dump_name, save_path)) stage_path = os.path.join(dir_save, dump_name + '.csv') shutil.copyfile(save_path, stage_path) print('[OK] Copied dump "%s" to %s' % (dump_name, stage_path)) # Close database connection. db.close()
def generate_public_data_dumps(limit=None, verbose=False): """ Generates the public data dump files from the latest production data """ # Connect to the latest production data schema db = DatabaseConnection(path_config='db_config_update_source.yaml') schema = db.get_latest_schema('prod_') db.execute('SET search_path="' + schema + '";') timestamp = schema[schema.rfind('_') + 1:] if verbose: print('[OK] Dumping from schema "%s"...' % (schema)) if limit is not None: print('[WARNING] Dumping with row limit %d!' % (limit)) # Read YAML configuration file config = yaml_load('public_dumps.yaml') dir_save = config['save_directory'] dumps = config['dumps'] # Process all dumps for dump_name in dumps: # Construct dump query q = dumps[dump_name]['query'] q = q.rstrip().rstrip(';') # possibly remove ; ending if limit is not None: q += ' LIMIT %d' % (limit) # Dump to CSV without timestamp path_output = '%s%s.csv' % (dir_save, dump_name) db.dump_to_CSV(q, path_output) if verbose: print('[OK] Created dump "%s" in %s' % (dump_name, path_output)) # Dump to CSV with timestamp path_output = '%s%s_%s.csv' % (dir_save, dump_name, timestamp) db.dump_to_CSV(q, path_output) if verbose: print('[OK] Created dump "%s" in %s' % (dump_name, path_output)) # Close database connection db.close()