Esempio n. 1
0
def copy_from_local_file(schema,
                         table,
                         local_path,
                         delimiter=',',
                         encoding='utf-8',
                         truncate=True,
                         connection='covid_db_postgres'):
    conn = PostgresHook(connection)
    conn_engine = conn.get_sqlalchemy_engine()
    if encoding != 'utf-8':
        print('Starting encoding conversion...')
        targetFileName = 'teste.csv'
        BLOCKSIZE = 1048576  # or some other, desired size in bytes
        with codecs.open(local_path, 'r', encoding) as sourceFile:
            with codecs.open(targetFileName, 'w', 'utf-8') as targetFile:
                while True:
                    contents = sourceFile.read(BLOCKSIZE)
                    if not contents:
                        break
                    targetFile.write(contents)
        local_path = targetFileName
    if truncate:
        print('Truncating table...')
        conn_engine.execute(f'truncate table {schema}.{table};')
    print('Loading table...')
    sql = f"COPY {schema}.{table} FROM STDIN DELIMITERS '{delimiter}' csv header encoding 'utf-8'"
    conn.copy_expert(sql, filename=local_path)
    return f'Table: {table} loaded!'
Esempio n. 2
0
def store_data():
    hook = PostgresHook(postgres_conn_id='postgres')
    hook.copy_expert(sql='''
        COPY stocks.tesla(date, close) 
        FROM STDIN DELIMITER ','
        CSV HEADER 
        ''',
                     filename='/tmp/tesla_processed.csv')
class DatabaseToCSVOperator(BaseOperator):
    @apply_defaults
    def __init__(self, filePath: str, fileName: str, sqlQuery: str, *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.postgres_conn_id = 'ods'
        self.autocommit = False
        self.parameters = None
        self.database = None
        self.file_path = filePath
        self.file_name = fileName
        self.sql = sqlQuery
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                                 schema=self.database)
        Path(self.file_path).mkdir(parents=True, exist_ok=True)

    """
    Extract data from the database to CSV file
    """

    def execute(self, context):
        logging.info("execute initializing")
        dag_id = context['dag'].dag_id
        environment = Variable.get(constants.variable_environment)
        task_instance = context['ti']
        task_id = task_instance.task_id
        task_try_number = task_instance.try_number
        task_start_date = task_instance.start_date
        task_end_date = task_instance.end_date
        task_operator = task_instance.operator
        log_url = task_instance.log_url
        logging.info("Running query against database")
        self.log.info('Executing: %s', self.sql)
        conn = self.hook.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        result = cursor.fetchall()

        if cursor.rowcount > 0:
            # Write to CSV file
            temp_path = self.file_path + self.file_name
            tmp_path = self.file_path + '-' + self.file_name
            with open(temp_path, 'w') as fp:
                a = csv.writer(fp, quoting=csv.QUOTE_MINIMAL, delimiter=',')
                a.writerow([i[0] for i in cursor.description])
                a.writerows(result)

            with open(temp_path, 'rb') as f:
                data = f.read()
            f.close()
            logging.info("file created")
            self.hook.copy_expert(self.sql, filename=tmp_path)
        else:
            logging.info(
                'File was not created because there was not result from the query'
            )
Esempio n. 4
0
def get_table_data(conn_id, table_name, dir_path):
    file_path = dir_path / f'{table_name}.csv'

    pg_hook = PostgresHook(postgres_conn_id=conn_id)

    table_id = sql.Identifier(table_name)
    sql_query = sql.SQL(
        "COPY (SELECT * FROM {}) TO STDOUT WITH CSV HEADER").format(table_id)

    pg_hook.copy_expert(sql_query, file_path)

    return file_path
 def execute(self, context):
     hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                         schema=self.database)
     sql = f"""
     COPY {self.table_name}({','.join(self.fields)})
     FROM STDIN WITH CSV HEADER
     """
     self.log.info(f"Executing: {sql}")
     for filename in glob.glob(self.folder):
         self.log.info(f"Copying file: {filename}")
         hook.copy_expert(sql=sql, filename=filename)
         for output in hook.conn.notices:
             self.log.info(output)
Esempio n. 6
0
class PostgresDB:
    def __init__(self, conn_id: str) -> None:
        self.hook = PostgresHook(postgres_conn_id=conn_id)

    def save_table_to_file(
        self,
        table_name: str,
        dir_path: Path
    ) -> Path:
        file_path = dir_path/f'{table_name}.csv'

        table_id = sql.Identifier(table_name)
        sql_text = 'COPY (SELECT * FROM {}) TO STDOUT WITH CSV HEADER'
        sql_query = sql.SQL(sql_text).format(table_id)

        self.hook.copy_expert(sql_query, file_path)

        return file_path

    def load_table_from_file(
        self, 
        table_name: str, 
        file_path: Path
    ) -> Path:
        table_id = sql.Identifier(table_name)
        sql_text = "COPY {} FROM STDIN DELIMITER ',' CSV HEADER"
        sql_query = sql.SQL(sql_text).format(table_id)

        self.hook.copy_expert(sql_query, file_path)

        return file_path

    def create_schema(self, schema_name: str) -> None:
        schema_id = sql.Identifier(schema_name)
        sql_query = sql.SQL('CREATE SCHEMA IF NOT EXISTS {}').format(schema_id)

        with self.hook.get_conn() as conn:
            with conn.cursor() as cur:
                cur.execute(sql_query)

    def create_table(self, table_info: Dict) -> None:
        table_name = table_info['table_name']
        table_cols = table_info['table_cols']
        constraint_name = table_info.get('constraint_name')
        constraint_cols = table_info.get('constraint_cols')

        table_id = sql.Identifier(table_name)

        with self.hook.get_conn() as conn:
            with conn.cursor() as cur:
                cols = [f'{sql.Identifier(col_name).as_string(cur)} {col_type}'
                        for (col_name, col_type) in table_cols.items()]
                cols_list = sql.SQL(','.join(cols))

                if constraint_name is None or constraint_cols is None:
                    sql_text = 'CREATE TABLE IF NOT EXISTS {} ({})'
                    sql_query = sql.SQL(sql_text).format(table_id, cols_list)
                else:
                    constraint_id = sql.Identifier(constraint_name)
                    constraints = [sql.Identifier(field) 
                                   for field in constraint_cols]
                    constraints_list = sql.SQL(',').join(constraints)

                    sql_text = 'CREATE TABLE IF NOT EXISTS {} ({}, CONSTRAINT {} UNIQUE ({}))'
                    sql_query = sql.SQL(sql_text).format(
                        table_id,
                        cols_list,
                        constraint_id,
                        constraints_list
                    )

                cur.execute(sql_query)

    def drop_table(self, table_name: str) -> None:
        table_id = sql.Identifier(table_name)
        sql_query = sql.SQL('DROP TABLE IF EXISTS {}').format(table_id)

        with self.hook.get_conn() as conn:
            with conn.cursor() as cur:
                cur.execute(sql_query)

    def execute(self, sql_query: str) -> None:
        with self.hook.get_conn() as conn:
            with conn.cursor() as cur:
                cur.execute(sql_query)

            conn.commit()