def load(self, *args, **kwargs): with get_postgres() as conn: with conn.cursor() as cursor: try: with open(constants.LOCAL_STORAGE + self.file, "r") as upload_file: # cursor.copy_from(upload_file,self.table_name,sep='\t') # conn.commit() cursor.copy_expert( "COPY {} from stdin with csv header delimiter '\t'" .format(self.table_name), upload_file) conn.commit() upload_error = False for msg in conn.notices: if '{}'.format(self.table_name) not in msg: continue upload_error = True if 'stl_load_errors' in msg else upload_error if 'record' in msg: try: rows_cnt = int(re.findall(r'\d+', msg)[0]) except (IndexError, ValueError): pass if upload_error: self.show_redshift_error(conn, rollback=False) return except psycopg2.InternalError as ex: raise ex
def _get_parameters(self): with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.RealDictCursor) as cursor: cursor.execute(""" SELECT * FROM public.indeed_etl_jobs where is_active = TRUE """) for query in cursor.fetchall(): yield query
def _get_open_jobs(self): with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.RealDictCursor) as cursor: cursor.execute(""" select distinct job_key from public.indeed_jobs_duration where job_expired = '1900-01-01' and job_no_api = '1900-01-01' """) for query in cursor.fetchall(): yield query
def get_tables_list(self, scheme='public'): with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute( """ select distinct("tablename") as tablename from pg_table_def where "schemaname"=%s; """, (scheme, )) return [row['tablename'] for row in cursor.fetchall()]
def exec_analyze_table(self, table_name, scheme='public'): """ Perform the ANALYZE operation on a given table """ # Detect if scheme is present in a table name if '.' not in table_name: table_name = '"{}"."{}"'.format(scheme, table_name) analyze_query = 'analyze {}'.format(table_name) with get_postgres() as conn: with conn.cursor() as cursor: return self.exec_query( cursor, analyze_query, query_description='ANALYZE {}'.format(table_name))
def table_exists(self, table_name, scheme='public'): with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute( """ select distinct("tablename") as tablename from pg_table_def where "tablename"=%s and "schemaname"=%s; """, (table_name, scheme)) return len(cursor.fetchall()) > 0
def rebuild_stage_table(self, recreate_table=True): """ Rebuild appropriate ETL stage table """ with get_postgres() as conn: with conn.cursor() as cursor: if recreate_table: cursor.execute("drop table if exists {} cascade".format( self.table_name)) cursor.execute(self.table_ddl) conn.commit() return if not runner.table_exists(self.table_name): cursor.execute(self.table_ddl) conn.commit() return
def exec_sql_scripts(self, paths, show_error=True, explicit_commit=False): """ Execute multiple SQL scripts one by one """ affected_rows = 0 with get_postgres() as conn: for path in paths: sql_query = self.read_query(path) with conn.cursor( cursor_factory=psycopg2.extras.DictCursor) as cursor: affected_rows += self.exec_query(cursor, sql_query, show_error=show_error, query_description=path) if explicit_commit: conn.commit() return affected_rows
def exec_analyze_tables(self, table_names, scheme='public'): """ Perform the ANALYZE operation on multiple tables """ analyze_query = 'analyze {}' affected_rows = 0 with get_postgres() as conn: with conn.cursor() as cursor: for table_name in table_names: # Detect if scheme is present in a table name if '.' not in table_name: table_name = '"{}"."{}"'.format(scheme, table_name) affected_rows += self.exec_query( cursor, analyze_query.format(table_name), query_description='ANALYZE {}'.format(table_name)) return affected_rows
def exec_sql_script(self, path, params=None, show_error=True, explicit_commit=False): """ Execute a single SQL script """ sql_query = self.read_query(path) with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.DictCursor) as cursor: result = self.exec_query(cursor, sql_query, params, show_error=show_error, query_description=path) if explicit_commit: conn.commit() return result
def main(*args, **kwargs): extractor = IndeedDuration() with get_postgres() as conn: with conn.cursor( cursor_factory=psycopg2.extras.RealDictCursor) as cursor: for job_key, expired, api_active in extractor.extract(): print(job_key, expired, api_active) if expired: cursor.execute(f""" update public.indeed_jobs_duration set job_expired = current_date where job_key = '{job_key}' """) elif not api_active: cursor.execute(f""" update public.indeed_jobs_duration set job_no_api = current_date where job_key = '{job_key}' """) conn.commit()