def main():
    """Generates public data dump files from the latest prod data."""

    # Connect to the latest schemas.
    db = DatabaseConnection(path_config='db_config.yaml')
    schema = db.get_latest_schema('prod_')
    schema_profil = db.get_latest_schema('source_internal_profil_')
    db.execute('SET search_path="' + schema + '", "' + schema_profil + '";')
    timestamp = schema[schema.rfind('_') + 1:]
    print('[OK] Dumping from schemas "%s" and "%s"...' %
          (schema, schema_profil))

    # Read YAML configuration file.
    config = yaml_load('public_dumps.yaml')
    dir_save = config['save_directory']
    dumps = config['dumps']

    # Process all dumps.
    for dump_name in dumps:
        save_path = os.path.join(dir_save,
                                 '%s_%s.csv' % (dump_name, timestamp))
        db.dump_to_CSV(dumps[dump_name]['query'], save_path)
        print('[OK] Saved dump "%s" to %s' % (dump_name, save_path))

        stage_path = os.path.join(dir_save, dump_name + '.csv')
        shutil.copyfile(save_path, stage_path)
        print('[OK] Copied dump "%s" to %s' % (dump_name, stage_path))

    # Close database connection.
    db.close()
Ejemplo n.º 2
0
def generate_public_data_dumps(limit=None, verbose=False):
    """ Generates the public data dump files from the latest production data """

    # Connect to the latest production data schema
    db = DatabaseConnection(path_config='db_config_update_source.yaml')
    schema = db.get_latest_schema('prod_')
    db.execute('SET search_path="' + schema + '";')
    timestamp = schema[schema.rfind('_') + 1:]
    if verbose:
        print('[OK] Dumping from schema "%s"...' % (schema))
    if limit is not None:
        print('[WARNING] Dumping with row limit %d!' % (limit))

    # Read YAML configuration file
    config = yaml_load('public_dumps.yaml')
    dir_save = config['save_directory']
    dumps = config['dumps']

    # Process all dumps
    for dump_name in dumps:
        # Construct dump query
        q = dumps[dump_name]['query']
        q = q.rstrip().rstrip(';')  # possibly remove ; ending
        if limit is not None:
            q += ' LIMIT %d' % (limit)

        # Dump to CSV without timestamp
        path_output = '%s%s.csv' % (dir_save, dump_name)
        db.dump_to_CSV(q, path_output)
        if verbose:
            print('[OK] Created dump "%s" in %s' % (dump_name, path_output))

        # Dump to CSV with timestamp
        path_output = '%s%s_%s.csv' % (dir_save, dump_name, timestamp)
        db.dump_to_CSV(q, path_output)
        if verbose:
            print('[OK] Created dump "%s" in %s' % (dump_name, path_output))

    # Close database connection
    db.close()