def distinct(unit): from utils.table_tools import distinct_import_sql with conn_curs(DATABASE_CONF) as (connection, cursor): for actual_table in unit.ACTUAL_TABLES: if actual_table.has_key('distinct_on') and len( actual_table['distinct_on']) > 0: cursor.execute(distinct_import_sql(actual_table))
def partition(): from collections import OrderedDict from utils.create_partitions import create_flat_partitions from process_units import HIERARCHY,UNIT_DICT with conn_curs(DATABASE_CONF) as (connnection,cursor): for schema_table in SCHEMA_TABLES: create_flat_partitions(schema_table.name,('election_key','state_key'),HIERARCHY,UNIT_DICT,cursor)
def union(unit): from utils.table_tools import create_union_table_sql with conn_curs(DATABASE_CONF) as (connnection,cursor): for union in unit.UNIONS: cursor.execute('DROP TABLE IF EXISTS {name} CASCADE;'.format(name=union['actual_table']['import_table'])) union_sql = create_union_table_sql(SCHEMA_TABLE_DICT[union['actual_table']['schema_table']],union) cursor.execute(union_sql)
def rekey(unit): print 'ENTERING REKEY' from utils.table_tools import rekey_import_sql, create_timestamp_table_sql from config import timestamp_suffix with conn_curs(DATABASE_CONF) as (connection, cursor): cleared_tables = set() for schema_table in SCHEMA_TABLES: cursor.execute( create_timestamp_table_sql(schema_table, unit.partition_suffixes, timestamp_suffix)) for table in unit.ACTUAL_TABLES: print 'TABLE: {}'.format(table) schema_table = SCHEMA_TABLE_DICT[table['schema_table']] if len(table['long_fields']) > 0: sql = rekey_import_sql( SCHEMA_TABLE_DICT[table['schema_table']], table, unit.partition_suffixes, timestamp_suffix) print sql cursor.execute(sql) else: sql = 'INSERT INTO {name}'.format( name=table['schema_table'] ) + ''.join( '_{suffix}'.format(suffix) for suffix in unit.partition_suffixes ) + timestamp_suffix + ' SELECT * FROM {import_table};'.format( import_table=table['import_table']) cursor.execute(sql) timestamp(unit, connection, cursor)
def clean_import(unit): from utils.table_tools import import_table_sql,create_union_table_sql with conn_curs(DATABASE_CONF) as (connnection,cursor): for actual_table in unit.ACTUAL_TABLES: cursor.execute('DROP TABLE IF EXISTS {name} CASCADE;'.format(name=actual_table['import_table'])) import_sql = import_table_sql(SCHEMA_TABLE_DICT[actual_table['schema_table']],actual_table) print import_sql cursor.execute(import_sql)
def dump_json(nulls=False): from config import json_location import os with conn_curs(DATABASE_CONF) as (connection,cursor): for schema_table in SCHEMA_TABLES: sql = build_sql(schema_table) cursor.execute(sql) write_json(cursor.fetchall(),os.path.join(json_location,schema_table.name+'.json'),nulls)
def partition(): from collections import OrderedDict from utils.create_partitions import create_flat_partitions from process_units import HIERARCHY, UNIT_DICT with conn_curs(DATABASE_CONF) as (connnection, cursor): for schema_table in SCHEMA_TABLES: create_flat_partitions(schema_table.name, ('election_key', 'state_key'), HIERARCHY, UNIT_DICT, cursor)
def build(unit): from ersatz import new_process_copies with conn_curs(DATABASE_CONF) as (connection,cursor): if not hasattr(unit,'ERSATZPG_CONFIG'): setattr(unit,'ERSATZPG_CONFIG',make_ersatz_conf(unit)) new_process_copies(unit,connection) distinct(unit) union(unit) rekey(unit)
def clean_schema(): with conn_curs(DATABASE_CONF) as (connnection, cursor): for enum in SCHEMA_ENUMS: cursor.execute(enum.drop()) cursor.execute(enum.sql()) for seq in SCHEMA_SEQS: cursor.execute(seq.drop()) cursor.execute(seq.sql()) for table in SCHEMA_TABLES: cursor.execute(table.drop()) cursor.execute(table.sql())
def clean_schema(): with conn_curs(DATABASE_CONF) as (connnection,cursor): for enum in SCHEMA_ENUMS: cursor.execute(enum.drop()) cursor.execute(enum.sql()) for seq in SCHEMA_SEQS: cursor.execute(seq.drop()) cursor.execute(seq.sql()) for table in SCHEMA_TABLES: cursor.execute(table.drop()) cursor.execute(table.sql())
def dump_json(nulls=False): from config import json_location import os with conn_curs(DATABASE_CONF) as (connection, cursor): for schema_table in SCHEMA_TABLES: sql = build_sql(schema_table) cursor.execute(sql) write_json( cursor.fetchall(), os.path.join(json_location, schema_table.name + '.json'), nulls)
def build(unit): print 'RUNNING BUILD' from ersatz import new_process_copies with conn_curs(DATABASE_CONF) as (connection, cursor): if not hasattr(unit, 'ERSATZPG_CONFIG'): setattr(unit, 'ERSATZPG_CONFIG', make_ersatz_conf(unit)) #cursor.execute('alter table ballot_contest_import add column "contest_type" contestenum;') new_process_copies(unit, connection) distinct(unit) union(unit) rekey(unit)
def build(unit): print 'RUNNING BUILD' from ersatz import new_process_copies with conn_curs(DATABASE_CONF) as (connection,cursor): if not hasattr(unit,'ERSATZPG_CONFIG'): setattr(unit,'ERSATZPG_CONFIG',make_ersatz_conf(unit)) #cursor.execute('alter table ballot_contest_import add column "contest_type" contestenum;') new_process_copies(unit,connection) distinct(unit) union(unit) rekey(unit)
def clean_import(unit): print 'running clean import\n\n' from utils.table_tools import import_table_sql,create_union_table_sql with conn_curs(DATABASE_CONF) as (connnection,cursor): for actual_table in unit.ACTUAL_TABLES: print 'ACTUAL: {}'.format(actual_table) cursor.execute('DROP TABLE IF EXISTS {name} CASCADE;'.format(name=actual_table['import_table'])) #for k,v in SCHEMA_TABLE_DICT.iteritems(): # print k,v import_sql = import_table_sql(SCHEMA_TABLE_DICT[actual_table['schema_table']],actual_table) print import_sql +'\n' cursor.execute(import_sql) print 'ending clean import\n\n\n'
def clean_import(unit): print 'running clean import\n\n' from utils.table_tools import import_table_sql, create_union_table_sql with conn_curs(DATABASE_CONF) as (connnection, cursor): for actual_table in unit.ACTUAL_TABLES: print 'ACTUAL: {}'.format(actual_table) cursor.execute('DROP TABLE IF EXISTS {name} CASCADE;'.format( name=actual_table['import_table'])) #for k,v in SCHEMA_TABLE_DICT.iteritems(): # print k,v import_sql = import_table_sql( SCHEMA_TABLE_DICT[actual_table['schema_table']], actual_table) print import_sql + '\n' cursor.execute(import_sql) print 'ending clean import\n\n\n'
def rekey(unit): from utils.table_tools import rekey_import_sql,create_timestamp_table_sql from config import timestamp_suffix with conn_curs(DATABASE_CONF) as (connection,cursor): cleared_tables = set() for schema_table in SCHEMA_TABLES: cursor.execute(create_timestamp_table_sql(schema_table,unit.partition_suffixes,timestamp_suffix)) for table in unit.ACTUAL_TABLES: schema_table = SCHEMA_TABLE_DICT[table['schema_table']] if len(table['long_fields']) > 0: sql = rekey_import_sql(SCHEMA_TABLE_DICT[table['schema_table']],table, unit.partition_suffixes,timestamp_suffix) print sql cursor.execute(sql) else: sql = 'INSERT INTO {name}'.format(name=table['schema_table']) + ''.join('_{suffix}'.format(suffix) for suffix in unit.partition_suffixes) + timestamp_suffix + ' SELECT * FROM {import_table};'.format(import_table=table['import_table']) cursor.execute(sql) timestamp(unit,connection,cursor)
def distinct(unit): from utils.table_tools import distinct_import_sql with conn_curs(DATABASE_CONF) as (connection,cursor): for actual_table in unit.ACTUAL_TABLES: if actual_table.has_key('distinct_on') and len(actual_table['distinct_on']) > 0: cursor.execute(distinct_import_sql(actual_table))