def to_redshift(self, table_name, username=None, password=None, host=None, db=None, port=None, **copy_args): """ Write a table to a Redshift database. Note, this requires you to pass AWS S3 credentials or store them as environmental variables. Args: username: str Required if env variable ``REDSHIFT_USERNAME`` not populated password: str Required if env variable ``REDSHIFT_PASSWORD`` not populated host: str Required if env variable ``REDSHIFT_HOST`` not populated db: str Required if env variable ``REDSHIFT_DB`` not populated port: int Required if env variable ``REDSHIFT_PORT`` not populated. Port 5439 is typical. \**copy_args: kwargs See :func:`~parsons.databases.Redshift.copy`` for options. Returns: ``None`` """ # noqa: W605 from parsons import Redshift rs = Redshift( username=username, password=password, host=host, db=db, port=port) rs.copy(self, table_name, **copy_args)
def from_redshift(cls, query, username=None, password=None, host=None, db=None, port=None): """ Create a ``parsons table`` from a Redshift query. To pull an entire Redshift table, use a query like ``SELECT * FROM tablename``. `Args:` query: str A valid SQL statement username: str Required if env variable ``REDSHIFT_USERNAME`` not populated password: str Required if env variable ``REDSHIFT_PASSWORD`` not populated host: str Required if env variable ``REDSHIFT_HOST`` not populated db: str Required if env variable ``REDSHIFT_DB`` not populated port: int Required if env variable ``REDSHIFT_PORT`` not populated. Port 5439 is typical. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ from parsons import Redshift rs = Redshift(username=username, password=password, host=host, db=db, port=port) return rs.query(query)
def setUp(self): self.temp_schema = TEMP_SCHEMA self.rs = Redshift() self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) # Create a schema, create a table, create a view setup_sql = f""" drop schema if exists {self.temp_schema} cascade; create schema {self.temp_schema}; """ other_sql = f""" create table {self.temp_schema}.test (id smallint,name varchar(5)); create view {self.temp_schema}.test_view as (select * from {self.temp_schema}.test); """ # noqa: E501 self.rs.query(setup_sql) self.rs.query(other_sql) self.s3 = S3() self.temp_s3_bucket = os.environ['S3_TEMP_BUCKET'] self.temp_s3_prefix = 'test/'
def setUp(self): self.rs = Redshift(username='******', password='******', host='test', db='test', port=123) self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) self.tbl2 = Table([ ["c1", "c2", "c3", "c4", "c5", "c6", "c7"], ["a", "", 1, "NA", 1.4, 1, 2], ["b", "", 2, "NA", 1.4, 1, 2], ["c", "", 3.4, "NA", "", "", "a"], ["d", "", 5, "NA", 1.4, 1, 2], ["e", "", 6, "NA", 1.4, 1, 2], ["f", "", 7.8, "NA", 1.4, 1, 2], ["g", "", 9, "NA", 1.4, 1, 2], ]) self.mapping = self.rs.generate_data_types(self.tbl) self.rs.DO_PARSE_BOOLS = True self.mapping2 = self.rs.generate_data_types(self.tbl2) self.rs.DO_PARSE_BOOLS = False self.mapping3 = self.rs.generate_data_types(self.tbl2)
def main(): setup_environment() #create an instance of s3 and redshift s3 = S3() rs = Redshift() STAGE2_GLOB = 'stage2.*.csv' SCHEMA = { 'congressional_district': np.float64, 'state_house_district': np.float64, 'state_senate_district': np.float64, 'n_guns_involved': np.float64, } def load_csv(csv_fname): return pd.read_csv(csv_fname, dtype=SCHEMA, parse_dates=['date'], encoding='utf-8') def inner_sort(dfs): for df in dfs: assert all(~df['date'].isna()) df.sort_values('date', inplace=True) def outer_sort(dfs): # If the first incident in one file took place earlier than the first incident in another, # we assume all incidents in the former took place earlier than all incidents in the latter. dfs.sort(key=lambda df: df.loc[0].date) def main(): # Sort the dataframes by ascending date, then sort by ascending date *within* each dataframe, # then merge into 1 giant CSV. dfs = [load_csv(fname) for fname in glob(STAGE2_GLOB)] inner_sort(dfs) outer_sort(dfs) giant_df = pd.concat(dfs, ignore_index=True) giant_df.to_csv('stage3.csv', index=False, float_format='%g', encoding='utf-8') # Convert dataframe to a parsons table final_table = Table.from_dataframe(giant_df) #Push table to redshift final_table.to_redshift('cjaf_gvp.gva_2019_data', if_exists='drop') logger.info(f"Successfully created GVA 2019 Data Table")
def setUp(self): self.rs = Redshift(username='******', password='******', host='test', db='test', port=123) self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) self.mapping = self.rs.generate_data_types(self.tbl)
def setUp(self): self.temp_schema = TEMP_SCHEMA self.db = Redshift() # Create a schema. setup_sql = f""" DROP SCHEMA IF EXISTS {self.temp_schema} CASCADE; CREATE SCHEMA {self.temp_schema}; """ self.db.query(setup_sql) # Load dummy data to parsons tables self.table1 = Table.from_csv(f'{_dir}/test_data/sample_table_1.csv') self.table2 = Table.from_csv(f'{_dir}/test_data/sample_table_2.csv') # Create source table self.db.copy(self.table1, f'{self.temp_schema}.source') # Create DB Sync object self.db_sync = DBSync(self.db, self.db)
import os import logging from parsons import Redshift, S3, utilities # Redshift setup - this assumes a Civis Platform parameter called "REDSHIFT" set_env_var(os.environ['REDSHIFT_PORT']) set_env_var(os.environ['REDSHIFT_DB']) set_env_var(os.environ['REDSHIFT_HOST']) set_env_var(os.environ['REDSHIFT_CREDENTIAL_USERNAME']) set_env_var(os.environ['REDSHIFT_CREDENTIAL_PASSWORD']) rs = Redshift() # AWS setup - this assumes a Civis Platform parameter called "AWS" set_env_var('S3_TEMP_BUCKET', 'parsons-tmc') set_env_var('AWS_ACCESS_KEY_ID', os.environ['AWS_ACCESS_KEY_ID']) set_env_var('AWS_SECRET_ACCESS_KEY', os.environ['AWS_SECRET_ACCESS_KEY']) s3 = S3() # Logging logger = logging.getLogger(__name__) _handler = logging.StreamHandler() _formatter = logging.Formatter('%(levelname)s %(message)s') _handler.setFormatter(_formatter) logger.addHandler(_handler) logger.setLevel('INFO') bucket = os.environ['BUCKET'] schema = os.environ['SCHEMA']
} ### CODE from parsons import Table, Redshift, VAN from parsons import logger import os # Setup for name, value in config_vars.items( ): # sets variables if provided in this script if value.strip() != "": os.environ[name] = value rs = Redshift( ) # just create Redshift() - VAN connector is created dynamically below # Create dictionary of VAN states and API keys from multiline Civis credential myv_states = { x.split(",")[0]: x.split(",")[1] for x in os.environ['VAN_PASSWORD'].split("\r\n") } myv_keys = { k: VAN(api_key=v, db=os.environ['VAN_DB_NAME']) for k, v in myv_states.items() } # Create simple set of states for insertion into SQL states = "','".join([s for s in myv_keys])