def execute(self, context: Any) -> None: snowflake_hook = SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema, authenticator=self.authenticator, session_parameters=self.session_parameters, ) if self.schema: into = f"{self.schema}.{self.table}" else: into = self.table if self.columns_array: into = f"{into}({','.join(self.columns_array)})" sql_parts = [ f"COPY INTO {into}", f"FROM @{self.stage}/{self.prefix or ''}", ] if self.s3_keys: files = ", ".join(f"'{key}'" for key in self.s3_keys) sql_parts.append(f"files=({files})") sql_parts.append(f"file_format={self.file_format}") copy_query = "\n".join(sql_parts) self.log.info('Executing COPY command...') snowflake_hook.run(copy_query, self.autocommit) self.log.info("COPY command completed")
def execute(self, context): snowflake_hook = SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id) # Snowflake won't accept list of files it has to be tuple only. # but in python tuple([1]) = (1,) => which is invalid for snowflake files = str(self.s3_keys) files = files.replace('[', '(') files = files.replace(']', ')') # we can extend this based on stage base_sql = """ FROM @{stage}/ files={files} file_format={file_format} """.format(stage=self.stage, files=files, file_format=self.file_format) if self.columns_array: copy_query = """ COPY INTO {schema}.{table}({columns}) {base_sql} """.format(schema=self.schema, table=self.table, columns=",".join(self.columns_array), base_sql=base_sql) else: copy_query = """ COPY INTO {schema}.{table} {base_sql} """.format(schema=self.schema, table=self.table, base_sql=base_sql) self.log.info('Executing COPY command...') snowflake_hook.run(copy_query, self.autocommit) self.log.info("COPY command completed")
def _get_snowflake_hook(self) -> SnowflakeHook: return SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema, )
def execute(self, context: Any) -> None: snowflake_hook = SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema, authenticator=self.authenticator, session_parameters=self.session_parameters, ) files = "" if self.s3_keys: files = "files=({})".format(", ".join(f"'{key}'" for key in self.s3_keys)) # we can extend this based on stage base_sql = """ FROM @{stage}/{prefix} {files} file_format={file_format} """.format( stage=self.stage, prefix=(self.prefix if self.prefix else ""), files=files, file_format=self.file_format, ) if self.columns_array: copy_query = """ COPY INTO {schema}.{table}({columns}) {base_sql} """.format(schema=self.schema, table=self.table, columns=",".join(self.columns_array), base_sql=base_sql) else: copy_query = f""" COPY INTO {self.schema}.{self.table} {base_sql} """ copy_query = "\n".join(line.strip() for line in copy_query.splitlines()) self.log.info('Executing COPY command...') snowflake_hook.run(copy_query, self.autocommit) self.log.info("COPY command completed")
def get_hook(self): """ Create and return SnowflakeHook. :return: a SnowflakeHook instance. :rtype: SnowflakeHook """ return SnowflakeHook(snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema)
def get_hook(self) -> SnowflakeHook: """ Create and return SnowflakeHook. :return: a SnowflakeHook instance. :rtype: SnowflakeHook """ return SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema, authenticator=self.authenticator, session_parameters=self.session_parameters, )
import email import email.utils import time from pathlib import Path #variables dag_owner = 'franko' dag_name = 'DEMO_REC_ingest' test = False vars = Variable.get('DEMO_REC_INGEST_CONFIG', deserialize_json = True) input_files_path = vars['input_files_path'] # Excel files path local_dir = vars['local_dir'] # csv files path aws_access_key_id = Variable.get('AWS_ACCESS_KEY_ID') aws_secret_access_key = Variable.get('AWS_SECRET_ACCESS_KEY') snowflake_hook = SnowflakeHook(snowflake_conn_id = "snowflake_conn", database = 'DEMO') if not os.path.exists(input_files_path): os.makedirs(input_files_path) if not os.path.exists(local_dir): os.makedirs(local_dir) @send_alert(test = test) def look_for_new_feeds(*args, **kwargs): """ Function ---------------------- Monitor Office 365 Outlook Inbox, """
import boto3 from datetime import datetime, date, time import datetime as dt import pandas as pd import shutil import os import csv from airflow.contrib.operators.snowflake_operator import SnowflakeOperator dag_owner = 'franko' dag_name = 'test_api_hubspot' test = True aws_access_key_id = Variable.get('AWS_ACCESS_KEY_ID') aws_secret_access_key = Variable.get('AWS_SECRET_ACCESS_KEY') snowflake_hook = SnowflakeHook(snowflake_conn_id="snowflake_conn", database='RAWDATA') vars = Variable.get('HUBSPOT_LOAD_OBJ_CONFIG', deserialize_json=True) input_files_path = vars['contacts_created_dir'] # dir for vid contacts output_file_name_created = "contacts_created.csv" output_file_name_updated = "contacts_updated.csv" hubspot_api_key = Variable.get('HUBSPOT_API_KEY') #currently_date = datetime.combine(date.today()- dt.timedelta(days=1), time()) ##yesterday day currently_date = Variable.get('HUBSPOT_CONTACT_LASTEXEC') currently_date = datetime.strptime(currently_date, '%Y-%m-%d %H:%M:%S') accum_req = 0 merge_query = [ """ use raw_data.hubspot;""", """ MERGE INTO raw_data.hubspot.test_hubspot_api_contacts USING ( SELECT $1 canonical_vid,