Esempio n. 1
0
def json_to_postgres(data, table):

    # Write parsed JSON data to file
    with open('{}/data/{}.json'.format(root_dir, table), 'w') as f:
        json.dump(data, f)

    logging.info(
        'Creating unlogged table to store JSON {} data...'.format(table))
    staging_json_table = AsIs('staging.' + table + '_json')

    # Create an unlogged Postgres table to copy the JSON data to
    # Entire JSON blob will be copied to one field: doc
    execute_from_text('''

		DROP TABLE IF EXISTS {unlogged};

		CREATE UNLOGGED TABLE {unlogged} (doc JSON);

		'''.format(unlogged=staging_json_table))

    # Copy from JSON file to unlogged table
    with open('{}/data/{}.json'.format(root_dir, table), 'r') as f:
        _, cur = cursor()
        logging.info('Copying JSON {} data into table...'.format(table))
        cur.copy_expert('COPY {} FROM STDIN'.format(staging_json_table), f)

    # Make sure JSON table has data
    data_check_json(staging_json_table)
Esempio n. 2
0
def load_table(table, backfill, new_data_field, json_data=None):
    # If backfilling, run the DDL script (to drop and recreate prod table)
    if backfill:
        execute_from_file(root_dir + '/sql/' + table + '_ddl.sql')

    if json_data != None:
        # Copy JSON data to Postgres
        json_to_postgres(json_data, table)

    # Create staging table with data from latest request and insert into prod table
    execute_from_file(root_dir + '/sql/' + table + '_insert.sql')

    # Make sure staging and prod tables actually have data
    # Staging table may not have data if there is nothing new to insert
    data_check_row_count(table)

    if not backfill:
        # Output the new values to be inserted from the current execution
        new_data = execute_from_text(
            'SELECT DISTINCT {} FROM staging.{}'.format(new_data_field, table),
            full_output=True)

        # Format output
        new_data_clean = [value[0] for value in new_data]
        if 'date' in new_data_field:
            new_data_clean = [
                value.strftime('%Y-%m-%d') for value in new_data_clean
            ]

        if len(new_data) == 0:
            logging.info('No new data to insert into public.{}'.format(table))
        else:
            logging.info('New {} values inserted into public.{}: {}'.format(
                new_data_field, table, sorted(new_data_clean)))
Esempio n. 3
0
File: app.py Progetto: aalevine/iex
def json_to_postgres(data, table):
	"""
	Write parsed JSON data to unlogged Postgres table 
	Entire JSON blob will be copied to one record in the table
	This format allows for subsequent parsing into final SQL table format (using json_populate_recordset)

	:param data: parsed JSON data from API call (e.g. for company, prices tables)
	:param table: table name (string)
	"""

	# Write parsed JSON data to a local file
	with open('{}/data/{}.json'.format(root_dir, table), 'w') as f:
		json.dump(data, f)

	# Create an unlogged Postgres table to copy the local JSON file to. 
	# Table has one column: doc
	logging.info('Creating unlogged table to store JSON {} data...'.format(table))
	staging_json_table = AsIs('staging.'+table+'_json')

	execute_from_text('''

		DROP TABLE IF EXISTS {unlogged};

		CREATE UNLOGGED TABLE {unlogged} (doc JSON);

		'''.format(unlogged=staging_json_table))

	# Copy from local JSON file to unlogged table
	with open('{}/data/{}.json'.format(root_dir, table), 'r') as f:  
		_, cur = cursor()

		logging.info('Copying JSON {} data into table...'.format(table))
		cur.copy_expert('COPY {} FROM STDIN'.format(staging_json_table), f)

	# Make sure new unlogged table has data
	query_check_length = 'SELECT LENGTH(doc::VARCHAR) FROM {}'.format(staging_json_table)
	json_char_length = execute_from_text(query_check_length, aggregate_output=True)

	# Log success if the doc field is populated
	if json_char_length != 0:
		logging.info('JSON data successfully copied to {}. Char length: {}'.format(staging_json_table, json_char_length))
	else:
		logging.error('No JSON data copied to {}. Char length: {}'.format(staging_json_table, json_char_length))		
Esempio n. 4
0
def data_check_row_count(table):
    # Make sure each staging and production table has data
    for schema in ['staging', 'public']:
        query_row_count = 'SELECT COUNT(*) FROM {}.{}'.format(schema, table)
        row_count = execute_from_text(query_row_count, single_output=True)

        if row_count != 0:
            logging.info('Row count for {}.{}: {}'.format(
                schema, table, row_count))
        else:
            logging.info('Table {}.{} is empty'.format(schema, table))
Esempio n. 5
0
File: app.py Progetto: aalevine/iex
def load_table(table, backfill, new_data_field, from_json_data=None):
	"""
	Loads each production table

	If backfilling, drops and recreates the table
	If not backfilling, logs the new values to be inserted
	
	:param table: table name (string)
	:param backfill: perform backfill? (boolean)
	:param new_data_filed: field to check for newly inserted records (e.g. 'date' for prices table, 
		'stock_code' for company table)
	:param from_json_data: parsed JSON data to load the table with (e.g. company, prices tables). 
		The orders table is derived FROM the prices table, so it requires no JSON data processing
	"""

	# If backfilling, run the DDL script (to drop and recreate prod table)
	if backfill:
		execute_from_file(root_dir+'/sql/'+table+'_ddl.sql')
	
	# If the table requires loading from JSON data ('company', 'prices'), load it
	# 'orders' table is derived from existing tables, so it does not require loading from JSON
	if from_json_data != None:
		# Write parsed JSON data to unlogged Postgres table 
		json_to_postgres(from_json_data, table)

	# Create staging table with data from latest request and insert into prod table
	execute_from_file(root_dir+'/sql/'+table+'_insert.sql')

	# Make sure staging and prod tables actually have data
	# Note: staging table may not have data if there is nothing new to insert
	data_check_row_count(table)

	# If not backfilling, output the new values to be inserted from the current execution
	if not backfill:
		new_data = execute_from_text('SELECT DISTINCT {} FROM staging.{}'.format(new_data_field, table), full_output=True)

		# Format output
		new_data_clean = [value[0] for value in new_data]
		if 'date' in new_data_field:
			new_data_clean = [value.strftime('%Y-%m-%d') for value in new_data_clean]

		if len(new_data) == 0:
			logging.info('No new data to insert into public.{}'.format(table))
		else:
			# Log the new stock codes or dates to be appended 
			logging.info('New {} values inserted into public.{}: {}'.format(new_data_field, table, sorted(new_data_clean)))
Esempio n. 6
0
def data_check_json(table):
    '''
	In the below function (json_to_postgres), we save the parsed JSON response as an unlogged Postgres table
	This function makes sure this "staging" JSON table has data, i.e. the copy_expert function worked correctly
	Note: the entire JSON blob should be stored in the field 'doc'
	'''

    query_check_length = 'SELECT LENGTH(doc::VARCHAR) FROM {}'.format(table)
    json_char_length = execute_from_text(query_check_length,
                                         single_output=True)

    if json_char_length != 0:
        logging.info(
            'JSON data successfully copied to {}. Char length: {}'.format(
                table, json_char_length))
    else:
        logging.error('No JSON data copied to {}. Char length: {}'.format(
            table, json_char_length))