def copy_from_local_file(schema, table, local_path, delimiter=',', encoding='utf-8', truncate=True, connection='covid_db_postgres'): conn = PostgresHook(connection) conn_engine = conn.get_sqlalchemy_engine() if encoding != 'utf-8': print('Starting encoding conversion...') targetFileName = 'teste.csv' BLOCKSIZE = 1048576 # or some other, desired size in bytes with codecs.open(local_path, 'r', encoding) as sourceFile: with codecs.open(targetFileName, 'w', 'utf-8') as targetFile: while True: contents = sourceFile.read(BLOCKSIZE) if not contents: break targetFile.write(contents) local_path = targetFileName if truncate: print('Truncating table...') conn_engine.execute(f'truncate table {schema}.{table};') print('Loading table...') sql = f"COPY {schema}.{table} FROM STDIN DELIMITERS '{delimiter}' csv header encoding 'utf-8'" conn.copy_expert(sql, filename=local_path) return f'Table: {table} loaded!'
def store_data(): hook = PostgresHook(postgres_conn_id='postgres') hook.copy_expert(sql=''' COPY stocks.tesla(date, close) FROM STDIN DELIMITER ',' CSV HEADER ''', filename='/tmp/tesla_processed.csv')
class DatabaseToCSVOperator(BaseOperator): @apply_defaults def __init__(self, filePath: str, fileName: str, sqlQuery: str, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.postgres_conn_id = 'ods' self.autocommit = False self.parameters = None self.database = None self.file_path = filePath self.file_name = fileName self.sql = sqlQuery self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) Path(self.file_path).mkdir(parents=True, exist_ok=True) """ Extract data from the database to CSV file """ def execute(self, context): logging.info("execute initializing") dag_id = context['dag'].dag_id environment = Variable.get(constants.variable_environment) task_instance = context['ti'] task_id = task_instance.task_id task_try_number = task_instance.try_number task_start_date = task_instance.start_date task_end_date = task_instance.end_date task_operator = task_instance.operator log_url = task_instance.log_url logging.info("Running query against database") self.log.info('Executing: %s', self.sql) conn = self.hook.get_conn() cursor = conn.cursor() cursor.execute(self.sql) result = cursor.fetchall() if cursor.rowcount > 0: # Write to CSV file temp_path = self.file_path + self.file_name tmp_path = self.file_path + '-' + self.file_name with open(temp_path, 'w') as fp: a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter=',') a.writerow([i[0] for i in cursor.description]) a.writerows(result) with open(temp_path, 'rb') as f: data = f.read() f.close() logging.info("file created") self.hook.copy_expert(self.sql, filename=tmp_path) else: logging.info( 'File was not created because there was not result from the query' )
def get_table_data(conn_id, table_name, dir_path): file_path = dir_path / f'{table_name}.csv' pg_hook = PostgresHook(postgres_conn_id=conn_id) table_id = sql.Identifier(table_name) sql_query = sql.SQL( "COPY (SELECT * FROM {}) TO STDOUT WITH CSV HEADER").format(table_id) pg_hook.copy_expert(sql_query, file_path) return file_path
def execute(self, context): hook = PostgresHook(postgres_conn_id=self.postgres_conn_id, schema=self.database) sql = f""" COPY {self.table_name}({','.join(self.fields)}) FROM STDIN WITH CSV HEADER """ self.log.info(f"Executing: {sql}") for filename in glob.glob(self.folder): self.log.info(f"Copying file: {filename}") hook.copy_expert(sql=sql, filename=filename) for output in hook.conn.notices: self.log.info(output)
class PostgresDB: def __init__(self, conn_id: str) -> None: self.hook = PostgresHook(postgres_conn_id=conn_id) def save_table_to_file( self, table_name: str, dir_path: Path ) -> Path: file_path = dir_path/f'{table_name}.csv' table_id = sql.Identifier(table_name) sql_text = 'COPY (SELECT * FROM {}) TO STDOUT WITH CSV HEADER' sql_query = sql.SQL(sql_text).format(table_id) self.hook.copy_expert(sql_query, file_path) return file_path def load_table_from_file( self, table_name: str, file_path: Path ) -> Path: table_id = sql.Identifier(table_name) sql_text = "COPY {} FROM STDIN DELIMITER ',' CSV HEADER" sql_query = sql.SQL(sql_text).format(table_id) self.hook.copy_expert(sql_query, file_path) return file_path def create_schema(self, schema_name: str) -> None: schema_id = sql.Identifier(schema_name) sql_query = sql.SQL('CREATE SCHEMA IF NOT EXISTS {}').format(schema_id) with self.hook.get_conn() as conn: with conn.cursor() as cur: cur.execute(sql_query) def create_table(self, table_info: Dict) -> None: table_name = table_info['table_name'] table_cols = table_info['table_cols'] constraint_name = table_info.get('constraint_name') constraint_cols = table_info.get('constraint_cols') table_id = sql.Identifier(table_name) with self.hook.get_conn() as conn: with conn.cursor() as cur: cols = [f'{sql.Identifier(col_name).as_string(cur)} {col_type}' for (col_name, col_type) in table_cols.items()] cols_list = sql.SQL(','.join(cols)) if constraint_name is None or constraint_cols is None: sql_text = 'CREATE TABLE IF NOT EXISTS {} ({})' sql_query = sql.SQL(sql_text).format(table_id, cols_list) else: constraint_id = sql.Identifier(constraint_name) constraints = [sql.Identifier(field) for field in constraint_cols] constraints_list = sql.SQL(',').join(constraints) sql_text = 'CREATE TABLE IF NOT EXISTS {} ({}, CONSTRAINT {} UNIQUE ({}))' sql_query = sql.SQL(sql_text).format( table_id, cols_list, constraint_id, constraints_list ) cur.execute(sql_query) def drop_table(self, table_name: str) -> None: table_id = sql.Identifier(table_name) sql_query = sql.SQL('DROP TABLE IF EXISTS {}').format(table_id) with self.hook.get_conn() as conn: with conn.cursor() as cur: cur.execute(sql_query) def execute(self, sql_query: str) -> None: with self.hook.get_conn() as conn: with conn.cursor() as cur: cur.execute(sql_query) conn.commit()