예제 #1
0
    def execute(self, context: Any) -> None:
        snowflake_hook = SnowflakeHook(
            snowflake_conn_id=self.snowflake_conn_id,
            warehouse=self.warehouse,
            database=self.database,
            role=self.role,
            schema=self.schema,
            authenticator=self.authenticator,
            session_parameters=self.session_parameters,
        )

        if self.schema:
            into = f"{self.schema}.{self.table}"
        else:
            into = self.table
        if self.columns_array:
            into = f"{into}({','.join(self.columns_array)})"

        sql_parts = [
            f"COPY INTO {into}",
            f"FROM @{self.stage}/{self.prefix or ''}",
        ]
        if self.s3_keys:
            files = ", ".join(f"'{key}'" for key in self.s3_keys)
            sql_parts.append(f"files=({files})")
        sql_parts.append(f"file_format={self.file_format}")

        copy_query = "\n".join(sql_parts)

        self.log.info('Executing COPY command...')
        snowflake_hook.run(copy_query, self.autocommit)
        self.log.info("COPY command completed")
    def execute(self, context):
        snowflake_hook = SnowflakeHook(
            snowflake_conn_id=self.snowflake_conn_id)

        # Snowflake won't accept list of files it has to be tuple only.
        # but in python tuple([1]) = (1,) => which is invalid for snowflake
        files = str(self.s3_keys)
        files = files.replace('[', '(')
        files = files.replace(']', ')')

        # we can extend this based on stage
        base_sql = """
                    FROM @{stage}/
                    files={files}
                    file_format={file_format}
                """.format(stage=self.stage,
                           files=files,
                           file_format=self.file_format)

        if self.columns_array:
            copy_query = """
                COPY INTO {schema}.{table}({columns}) {base_sql}
            """.format(schema=self.schema,
                       table=self.table,
                       columns=",".join(self.columns_array),
                       base_sql=base_sql)
        else:
            copy_query = """
                COPY INTO {schema}.{table} {base_sql}
            """.format(schema=self.schema, table=self.table, base_sql=base_sql)

        self.log.info('Executing COPY command...')
        snowflake_hook.run(copy_query, self.autocommit)
        self.log.info("COPY command completed")
 def _get_snowflake_hook(self) -> SnowflakeHook:
     return SnowflakeHook(
         snowflake_conn_id=self.snowflake_conn_id,
         warehouse=self.warehouse,
         database=self.database,
         role=self.role,
         schema=self.schema,
     )
 def get_hook(self):
     """
     Create and return SnowflakeHook.
     :return: a SnowflakeHook instance.
     :rtype: SnowflakeHook
     """
     return SnowflakeHook(snowflake_conn_id=self.snowflake_conn_id,
                          warehouse=self.warehouse, database=self.database,
                          role=self.role, schema=self.schema)
예제 #5
0
 def get_hook(self) -> SnowflakeHook:
     """
     Create and return SnowflakeHook.
     :return: a SnowflakeHook instance.
     :rtype: SnowflakeHook
     """
     return SnowflakeHook(
         snowflake_conn_id=self.snowflake_conn_id,
         warehouse=self.warehouse,
         database=self.database,
         role=self.role,
         schema=self.schema,
         authenticator=self.authenticator,
         session_parameters=self.session_parameters,
     )
예제 #6
0
    def execute(self, context: Any) -> None:
        snowflake_hook = SnowflakeHook(
            snowflake_conn_id=self.snowflake_conn_id,
            warehouse=self.warehouse,
            database=self.database,
            role=self.role,
            schema=self.schema,
            authenticator=self.authenticator,
            session_parameters=self.session_parameters,
        )

        files = ""
        if self.s3_keys:
            files = "files=({})".format(", ".join(f"'{key}'"
                                                  for key in self.s3_keys))

        # we can extend this based on stage
        base_sql = """
                    FROM @{stage}/{prefix}
                    {files}
                    file_format={file_format}
                """.format(
            stage=self.stage,
            prefix=(self.prefix if self.prefix else ""),
            files=files,
            file_format=self.file_format,
        )

        if self.columns_array:
            copy_query = """
                COPY INTO {schema}.{table}({columns}) {base_sql}
            """.format(schema=self.schema,
                       table=self.table,
                       columns=",".join(self.columns_array),
                       base_sql=base_sql)
        else:
            copy_query = f"""
                COPY INTO {self.schema}.{self.table} {base_sql}
            """
        copy_query = "\n".join(line.strip()
                               for line in copy_query.splitlines())

        self.log.info('Executing COPY command...')
        snowflake_hook.run(copy_query, self.autocommit)
        self.log.info("COPY command completed")
예제 #7
0
import email
import email.utils
import time
from pathlib import Path

#variables
dag_owner = 'franko'
dag_name = 'DEMO_REC_ingest'
test = False

vars = Variable.get('DEMO_REC_INGEST_CONFIG', deserialize_json = True)
input_files_path = vars['input_files_path']  # Excel files path
local_dir = vars['local_dir']  # csv files path
aws_access_key_id = Variable.get('AWS_ACCESS_KEY_ID') 
aws_secret_access_key = Variable.get('AWS_SECRET_ACCESS_KEY')
snowflake_hook = SnowflakeHook(snowflake_conn_id = "snowflake_conn", database = 'DEMO')

if not os.path.exists(input_files_path):
    os.makedirs(input_files_path)

if not os.path.exists(local_dir):
    os.makedirs(local_dir)

@send_alert(test = test)
def look_for_new_feeds(*args, **kwargs):
    """
    Function
    ----------------------
    Monitor Office 365 Outlook Inbox,

    """
import boto3
from datetime import datetime, date, time
import datetime as dt
import pandas as pd
import shutil
import os
import csv
from airflow.contrib.operators.snowflake_operator import SnowflakeOperator

dag_owner = 'franko'
dag_name = 'test_api_hubspot'
test = True

aws_access_key_id = Variable.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = Variable.get('AWS_SECRET_ACCESS_KEY')
snowflake_hook = SnowflakeHook(snowflake_conn_id="snowflake_conn",
                               database='RAWDATA')
vars = Variable.get('HUBSPOT_LOAD_OBJ_CONFIG', deserialize_json=True)
input_files_path = vars['contacts_created_dir']  # dir for vid contacts
output_file_name_created = "contacts_created.csv"
output_file_name_updated = "contacts_updated.csv"
hubspot_api_key = Variable.get('HUBSPOT_API_KEY')
#currently_date =  datetime.combine(date.today()- dt.timedelta(days=1), time()) ##yesterday day
currently_date = Variable.get('HUBSPOT_CONTACT_LASTEXEC')
currently_date = datetime.strptime(currently_date, '%Y-%m-%d %H:%M:%S')
accum_req = 0
merge_query = [
    """ use raw_data.hubspot;""",
    """
MERGE INTO raw_data.hubspot.test_hubspot_api_contacts USING
(
  SELECT $1 canonical_vid,