Python PostgresHook.close Examples, airflow.hooks.postgres_hook.PostgresHook.close Python Examples

Example #1

0

Show file

File: init.py Project: mvsousa/mac499

def move_clean_to_conform(**kwargs):
    ti = kwargs['ti']
    file = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                        key='currently_processing')
    lote_id = ti.xcom_pull(task_ids='generate_unique_lote_id', key='lote_id')
    source = ti.xcom_pull(task_ids='move_csv_to_stagging_area', key='source')
    file_size = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                             key='file_size')
    table = ti.xcom_pull(task_ids='set_base_table', key='table')
    query = """SELECT * FROM {} WHERE loteId = '{}';""".format(table, lote_id)

    src_conn = PostgresHook(postgres_conn_id='etl_stage_clean').get_conn()
    dest_conn = PostgresHook(postgres_conn_id='etl_stage_conform').get_conn()

    src_cursor = src_conn.cursor()
    dest_cursor = dest_conn.cursor()

    src_cursor.execute(query)
    count = 0
    set_lote_id_begin(dest_cursor, lote_id, source, file_size)
    while True:
        records = src_cursor.fetchmany(size=1)
        if not records:
            break
        query_insert = """INSERT INTO {} VALUES %s""".format(table)
        execute_values(dest_cursor, query_insert, records)
        dest_conn.commit()
        count += 1
    set_lote_id_end(dest_cursor, lote_id, count, source, file_size)
    dest_conn.commit()
    src_cursor.close()
    dest_cursor.close()
    src_conn.close()
    dest_conn.close()
    return True

Example #2

0

Show file

File: get_data.py Project: CurtLH/airflow_dags

def get_team_data():

    conn = PostgresHook(postgres_conn_id="postgres_workboard").get_conn()
    conn.autocommit = True
    cur = conn.cursor()

    params = {"token": Variable.get("workboard_token")}
    base_url = "https://www.myworkboard.com/wb/apis"

    r = requests.get(base_url + "/team?include=org_teams", params=params)
    if r.status_code != 200:
        raise ValueError("Request not successful. Status code {}".format(
            r.status_code))
    else:
        teams = r.json()["data"]["team"]
        logging.info("There are {} teams".format(len(teams)))
        for t in teams:
            try:
                cur.execute(
                    """INSERT INTO teams_raw (team_id, team_data)
                        VALUES (%s, %s) ON CONFLICT (team_id)
                        DO UPDATE SET team_data = EXCLUDED.team_data
                    """,
                    [t["team_id"], json.dumps(t)],
                )
                logging.info("Team id #{} inserted into the table".format(
                    t["team_id"]))
            except BaseException:
                logging.warning("Record skipped")
                pass

    cur.close()
    conn.close()

Example #3

0

Show file

File: init.py Project: mvsousa/mac499

def load_csv_extract_db(**kwargs):
    ti = kwargs['ti']
    file = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                        key='currently_processing')
    lote_id = ti.xcom_pull(task_ids='generate_unique_lote_id', key='lote_id')
    source = ti.xcom_pull(task_ids='move_csv_to_stagging_area', key='source')
    file_size = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                             key='file_size')
    table = ti.xcom_pull(task_ids='set_base_table', key='table')

    dest_conn = PostgresHook(postgres_conn_id='etl_stage_extract').get_conn()
    dest_cursor = dest_conn.cursor()

    header = tuple(pd.read_csv(file, sep=';', nrows=1).columns)
    header = header + ('loteid', )
    strfy_header = '(' + ','.join(str(s) for s in header) + ')'
    count = 0
    set_lote_id_begin(dest_cursor, lote_id, source, file_size)
    for df_row in pd.read_csv(file, sep=';', chunksize=1):
        row = [tuple(x) for x in df_row.values]
        row = row[0]
        row = row + (lote_id, )
        strfy_row = '(' + ','.join('\'' + str(s) + '\'' for s in row) + ')'

        query = """INSERT INTO {} {} VALUES {};""".format(
            table, strfy_header, strfy_row)
        dest_cursor.execute(query)
        count += 1
        if count >= 100:
            break
    set_lote_id_end(dest_cursor, lote_id, count, source, file_size)
    dest_conn.commit()
    dest_cursor.close()
    dest_conn.close()
    return True

Example #4

0

Show file

File: db_tables.py Project: akumuthan-dev/data-flow

def query_database(query: str,
                   target_db: str,
                   table_name,
                   batch_size: int = 100000,
                   **kwargs):
    s3 = S3Data(table_name, kwargs["ts_nodash"])
    total_records = 0
    next_batch = 1
    connection = PostgresHook(postgres_conn_id=target_db).get_conn()

    try:
        # create connection with named cursor to fetch data in batches
        cursor = connection.cursor(name='query_database')
        cursor.execute(query)

        rows = cursor.fetchmany(batch_size)
        fields = [d[0] for d in cursor.description]
        while rows:
            records = []
            for row in rows:
                record = {fields[col]: row[col] for col in range(len(row))}
                records.append(record)
            s3.write_key(f'{next_batch:010}.json', records)
            next_batch += 1
            total_records += len(records)
            rows = cursor.fetchmany(batch_size)
    finally:
        if connection:
            cursor.close()
            connection.close()

Example #5

0

Show file

def etl(ds, **kwargs):
    execution_date = kwargs['execution_date'].strftime('%Y-%m-%d')
    query = """
SELECT *
FROM users
WHERE created_at::date = date '{}'
    """.format(execution_date)

    src_conn = PostgresHook(postgres_conn_id='source',
                            schema='source_schema').get_conn()
    dest_conn = PostgresHook(postgres_conn_id='dest',
                             schema='dest_schema').get_conn()

    # notice this time we are naming the cursor for the origin table
    # that's going to force the library to create a server cursor
    src_cursor = src_conn.cursor("serverCursor")
    src_cursor.execute(query)
    dest_cursor = dest_conn.cursor()

    # now we need to iterate over the cursor to get the records in batches
    while True:
        records = src_cursor.fetchmany(size=2000)
        if not records:
            break
        execute_values(dest_cursor, "INSERT INTO users VALUES %s", records)
        dest_conn.commit()

    src_cursor.close()
    dest_cursor.close()
    src_conn.close()
    dest_conn.close()

Example #6

0

Show file

    def execute(self, context):
        # self.log.info('LoadDimensionOperator not implemented yet')
        try:
            conn = PostgresHook(self.redshift_conn_id).get_conn()

            logging.info(f"Ensuring Valid Records::{self.table_name}")

            cursor = conn.cursor()

            cursor.execute(f'select count(*) from {self.table_name}')

            total_count = cursor.fetchone()

            logging.info(f"Total Count::{total_count}")

            if total_count == 0:
                raise Exception(
                    f"There are no records for table::{self.table_name}")

            conn.commit()

            cursor.close()

            conn.close()

        except Exception as e:
            raise e

Example #7

0

Show file

def load_transfer(**kwargs):
    # Collect days using function that accesses postgres
    days_open = average_days_open()

    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()

    # Find most recent pull
    cursor.execute('SELECT MAX(last_update_date) FROM airflow.transfer_table')
    start_date = cursor.fetchone()[0]
    if not start_date:
        cursor.execute(
            'SELECT MIN(last_update_date) FROM airflow.austin_service_reports')
        start_date = cursor.fetchone()[0] + timedelta(seconds=1)

    end_date = datetime.strptime(kwargs['ds'],
                                 '%Y-%m-%d').date() - timedelta(days=days_open)

    # Remove previous data in transfer table
    cursor.execute('DELETE FROM airflow.transfer_table')

    with open(SQL_PATH + 'insert_into_transfer_table.sql') as f:
        insert = f.read().format(start_date, end_date)

    cursor.execute(insert)
    conn.commit()

    cursor.close()
    conn.close()

Example #8

0

Show file

def average_days_open():
    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()
    cursor.execute(
        'SELECT AVG(average_num_days_open) FROM airflow.metrics_by_day')
    days_open = cursor.fetchone()

    cursor.close()
    conn.close()
    return int(days_open[0])

Example #9

0

Show file

def insert_from_copy_table_if_needed(target_db,
                                     table_name=None,
                                     task_instance=None,
                                     run_fetch_task_id=None,
                                     **kwargs):
    """
    Inserts from copy table to target table when all tasks succeed, if target table
    already exists. The rational behind is not doing any modification on target table
    before we make sure fetching from source and insertation is successful.
    """
    insert_from_copy_sql = """
        DELETE FROM {table_name};
        INSERT INTO {table_name}
        SELECT * FROM {copy_table_name};
        DROP TABLE {copy_table_name};
    """
    table_exists = task_instance.xcom_pull(
        task_ids='check-if-table-exists')[0][0]
    fetcher_state = task_instance.xcom_pull(key='state',
                                            task_ids=run_fetch_task_id)
    inserter_state = True
    for index in range(constants.INGEST_TASK_CONCURRENCY):
        inserter_state = (inserter_state and task_instance.xcom_pull(
            key='state',
            task_ids=f'execute-insert-into-{index}',
        ))
    if ((table_exists and table_exists != 'None') and fetcher_state is True
            and inserter_state is True):
        logging.info(f'Inserting from {table_name}_copy table to {table_name}')
        try:
            target_db_conn = PostgresHook(
                postgres_conn_id=target_db).get_conn()
            target_db_cursor = target_db_conn.cursor()
            target_db_cursor.execute(
                insert_from_copy_sql.format(
                    table_name=sql.Identifier(table_name).as_string(
                        target_db_conn),
                    copy_table_name=sql.Identifier(
                        f'{table_name}_copy').as_string(target_db_conn)))
            target_db_conn.commit()

        # TODO: Gotta Catch'm all
        except Exception as e:
            logging.error(f'Exception: {e}')
            target_db_conn.rollback()
            raise

        finally:
            if target_db_conn:
                target_db_cursor.close()
                target_db_conn.close()

    else:
        logging.info('Target table newly created. No need for copy table')

Example #10

0

Show file

File: extract_and_load.py Project: alexzlue/RB-Airflow

def bq_to_gcs(**kwargs):
    date_stamp = kwargs['ts']

    # get the last current date from Postgres
    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()

    cursor.execute('SELECT MAX(last_update_date) FROM airflow.austin_service_reports;')
    
    recent_ds = cursor.fetchone()[0]
    if recent_ds is not None:
        recent_ds+=timedelta(seconds=1)
        last = recent_ds
    else:
        last = kwargs['start_date']-timedelta(days=1)
    
    cursor.close()
    conn.close()

    # open connection to BigQuery
    hook = BigQueryHook(
        bigquery_conn_id='my_gcp_connection',
        use_legacy_sql=False
    )
    conn = hook.get_conn()
    cursor = conn.cursor()
    with open(SQL_PATH + 'query_bq_dataset.sql', 'r') as f:
        query = f.read()
    query = query.format(last, date_stamp)

    cursor.execute(query)

    # write to gcs bucket
    # Each returned row of the result gives:
    # result = [unique_key, complaint_type, complaint_description, owning_department, source,
    #           status, created_date, last_update_date, close_date, city]
    with BUCKET.open('bq_bucket/bq_dataset.txt', 'w') as f:
        while True:
            result = cursor.fetchone()
            if result is None:
                break
            
            if result[8] is None:
                result[8] = ''
            else:
                result[8] = datetime.utcfromtimestamp(result[8])
            if result[9] is None:
                result[9] = ''
            result[7] = datetime.utcfromtimestamp(result[7])
            result[6] = datetime.utcfromtimestamp(result[6])
            f.write('|'.join([str(val) for val in result]) + '\n')

    cursor.close()
    conn.close()

Example #11

0

Show file

File: extract_and_load.py Project: clairetaylor352/RB-Airflow

def bq_to_gcs(**kwargs):
    ds = kwargs['ds']
    previous = datetime.strptime(kwargs['prev_ds'], '%Y-%m-%d').date()

    # get the last current date from Postgres
    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()

    cursor.execute('SELECT MAX(CAST(created_date AS DATE)) FROM airflow.austin_service_reports;')
    
    recent_ds = cursor.fetchone()[0]
    if recent_ds is not None:
        recent_ds+=timedelta(days=1)
        if recent_ds < previous:
            prev_ds = datetime.strftime(recent_ds, '%Y-%m-%d')
        else:
            prev_ds = kwargs['prev_ds']
    else:
        prev_ds = datetime.strftime(kwargs['start_date']-timedelta(days=1), '%Y-%m-%d')
    
    cursor.close()
    conn.close()

    # open connection to BigQuery
    hook = BigQueryHook(
        bigquery_conn_id='my_gcp_connection',
        use_legacy_sql=False
    )
    conn = hook.get_conn()
    cursor = conn.cursor()
    with open(SQL_PATH + 'query_bq_dataset.sql', 'r') as f:
        query = f.read()
        query = query.format(prev_ds,ds)

    cursor.execute(query)

    # write to gcs bucket
    with BUCKET.open('bq_bucket/bq_dataset.csv', 'w') as f:
        while True:
            result = cursor.fetchone()
            if result is None:
                break
            
            if result[6] is None:
                result[6]= ''
            else:
                result[6] = datetime.utcfromtimestamp(result[6])
            result[5] = datetime.utcfromtimestamp(result[5])
            f.write(','.join([str(val) for val in result]) + '\n')

    cursor.close()
    conn.close()

Example #12

0

Show file

 def execute(self, context):
     try:
         logging.info("Dimension Operator::Creating Dimension Tables")
         conn = PostgresHook(self.redshift_conn_id).get_conn()
         logging.info(
             f"Dimension Operator::Executing Dimension Query::{self.sql_statement}"
         )
         cursor = conn.cursor()
         cursor.execute(self.sql_statement)
         conn.commit()
         cursor.close()
         conn.close()
     except Exception as e:
         raise e

Example #13

0

Show file

def create_target_table(
    target_db,
    table_name=None,
    field_mapping=None,
    task_instance=None,
    **kwargs,
):
    """Create target database table or delete all from it.

    If target table exists, create a copy table as a back up to be used in case of failure.
    If target table doesn't exits, create one.
    """
    create_table_sql = """
        CREATE TABLE {{ table_name }} (
        {% for _, tt_field_name, tt_field_constraints in field_mapping %}
            {{ tt_field_name }} {{ tt_field_constraints }}{{ "," if not loop.last }}
        {% endfor %}
        );
    """

    table_exists = task_instance.xcom_pull(
        task_ids='check-if-table-exists')[0][0]
    try:
        target_db_conn = PostgresHook(postgres_conn_id=target_db).get_conn()
        target_db_cursor = target_db_conn.cursor()

        # If table already exists in the target database, create a copy table to be used
        # for rollback in case of failiure or create one.
        # Until there will be possibility for incremental load
        if table_exists and table_exists != 'None':
            table_name = f'{table_name}_copy'

        logging.info(f'Creating a target table {table_name}')
        rendered_create_table_sql = Template(create_table_sql).render(
            table_name=sql.Identifier(table_name).as_string(target_db_conn),
            field_mapping=field_mapping,
        )
        target_db_cursor.execute(rendered_create_table_sql)
        target_db_conn.commit()

    # TODO: Gotta Catch'm all
    except Exception as e:
        logging.error(f'Exception: {e}')
        target_db_conn.rollback()
        raise

    finally:
        if target_db_conn:
            target_db_cursor.close()
            target_db_conn.close()

Example #14

0

Show file

File: af_fm_express_check.py Project: 1minimalist/dags

def read_data(**kwargs):
    cluster = cl(['10.103.5.51', '10.103.5.52', '10.103.5.53'])
    session = cluster.connect('darbiz')

    conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn()
    cur = conn.cursor()

    rows = session.execute("SELECT * from darbiz.forte_express_loan_requests where created_on>='2020-09-20' allow filtering")
    cur.execute ("select distinct owner profile_id, uid order_id, pay_title from dar_group.bazar_orders1 where created_on>=now()-interval '24' hour")
    res = cur.fetchall()
    for user_row in rows:
        d = json.loads(user_row.loan_request)
        id0 = user_row.profile_id
        id1 = user_row.order_id
        id2 = user_row.created_on
        pp = d['person_info']['financing_info']['period'] if 'period' in d['person_info']['financing_info'] else None
        lh = datetime.now() - timedelta(hours = 12)
        if id2>=lh:
            for a,b,c in res:
                ll=c.split()
                if id1==b:
                    if pp!=int(ll[2]):
                        email = EmailOperator(\
                            task_id='send_email',\
                            to=['*****@*****.**','*****@*****.**'],\
                            subject='Ошибка в Fortemarket',\
                            html_content='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \
                                .format(a, id2, b, pp, ll[2])\
                            )
                        email.execute(context=kwargs)

                        t3 = SlackWebhookOperator(
                            task_id='send_slack_notification',
                            http_conn_id='slack_connection',
                            message='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \
                                .format(a, id2, b, pp, ll[2]),\
                            # files = '/tmp/BPM_report.xlsx',
                            channel='#reports',\
                            dag=dag
                        )
                        t3.execute(context=kwargs)
                else:
                    continue
        else:
            continue
        # lt = d['person_info']['financing_info']['loan_type'] if 'loan_type' in d['person_info']['financing_info'] else None

    cur.close()
    conn.close()

Example #15

0

Show file

File: init.py Project: mvsousa/mac499

def clear_clean_db(**kwargs):
    ti = kwargs['ti']
    file = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                        key='currently_processing')
    lote_id = ti.xcom_pull(task_ids='generate_unique_lote_id', key='lote_id')
    source = ti.xcom_pull(task_ids='move_csv_to_stagging_area', key='source')
    file_size = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                             key='file_size')
    table = ti.xcom_pull(task_ids='set_base_table', key='table')
    query = """DELETE FROM {};""".format(table)
    dest_conn = PostgresHook(postgres_conn_id='etl_stage_clean').get_conn()
    dest_cursor = dest_conn.cursor()
    dest_cursor.execute(query)
    dest_conn.commit()
    dest_conn.close()
    return True

Example #16

0

Show file

File: elasticsearch_operator.py Project: GabrielSGoncalves/AirFlow_HCV

    def execute(self, context):
        postgres = PostgresHook(
            postgres_conn_id=self.postgres_conn_id).get_conn()
        es = ElasticsearchHook(
            elasticsearch_conn_id=self.elasticsearch_conn_id)

        self.log.info("Extracting data from PostgreSQL: %s", self.sql)

        with postgres.cursor(name="serverCursor",
                             cursor_factory=RealDictCursor) as postgres_cursor:
            postgres_cursor.itersize = 2000
            postgres_cursor.execute(self.sql)
            for row in postgres_cursor:
                doc = json.dumps(row, indent=2)
                es.add_doc(index=self.index, doc_type='external', doc=doc)
        postgres.close()

Example #17

0

Show file

def etl():

    conn = PostgresHook(postgres_conn_id="postgres_apple").get_conn()
    src_cur = conn.cursor()
    dest_cur = conn.cursor()

    query = """
    SELECT *
    FROM raw
    """

    src_cur.execute(query)

    while True:
        records = src_cur.fetchone()
        if not records:
            break

        row = []
        url = records[2]
        soup = bs(records[3], "html.parser")
        row.append(
            [
                url,
                apple.get_id_num(url),
                apple.get_price(soup),
                apple.get_date(soup),
                apple.get_screen(soup),
                apple.get_color(url),
            ]
        )

        try:
            dest_cur.execute(
                """INSERT INTO ads (url, id_num, price, date, screen, color)
                                VALUES (%s, %s, %s, %s, %s, %s)""",
                [i for i in row[0]],
            )
            conn.commit()
            logging.info("New record inserted")
        except BaseException:
            pass
            logging.info("Duplicate records")

    src_cur.close()
    dest_cur.close()
    conn.close()

Example #18

0

Show file

File: get_ads.py Project: CurtLH/airflow_dags

def get_ads_html():

    conn = PostgresHook(postgres_conn_id="postgres_apple").get_conn()
    conn.autocommit = True
    cur = conn.cursor()

    urls = apple.get_urls()
    for url in urls:
        html = apple.get_html(url)
        cur.execute(
            """INSERT INTO raw (url, html)
                      VALUES (%s, %s)""",
            [url, html],
        )

    cur.close()
    conn.close()

Example #19

0

Show file

File: extract_and_load.py Project: clairetaylor352/RB-Airflow

def load_table(**kwargs):
    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()

    client = storage.Client()
    bucket = client.get_bucket('airy-media-254122.appspot.com')
    blob = bucket.get_blob('bq_bucket/bq_dataset.csv')

    # create a temporary file and store csv into that to read
    tempf = NamedTemporaryFile()
    blob.download_to_filename(tempf.name)

    query = "COPY airflow.austin_service_reports FROM '"+tempf.name+"' WITH (FORMAT csv)"
    cursor.execute(query)

    tempf.close()
    conn.commit()
    cursor.close()
    conn.close()

Example #20

0

Show file

File: extract_and_load.py Project: alexzlue/RB-Airflow

def load_table(**kwargs):
    conn = PostgresHook(postgres_conn_id='my_local_db').get_conn()
    cursor = conn.cursor()

    client = storage.Client()
    bucket = client.get_bucket('airy-media-254122.appspot.com')
    blob = bucket.get_blob('bq_bucket/bq_dataset.txt')

    # create a temporary file and store csv into that to read
    tempf = NamedTemporaryFile()
    blob.download_to_filename(tempf.name)

    with open(SQL_PATH + 'load_postgres_table.sql') as f:
        query = f.read().format(tempf.name, kwargs['ts'])

    cursor.execute(query)

    tempf.close()
    conn.commit()
    cursor.close()
    conn.close()

Example #21

0

Show file

File: get_data.py Project: CurtLH/airflow_dags

def get_goal_data(goal_status=1):

    conn = PostgresHook(postgres_conn_id="postgres_workboard").get_conn()
    conn.autocommit = True
    cur = conn.cursor()

    params = {
        "token": Variable.get("workboard_token"),
        "goal_status": goal_status
    }

    base_url = "https://www.myworkboard.com/wb/apis"

    r = requests.get(base_url + "/goal", params=params)
    if r.status_code != 200:
        raise ValueError("Request not successful. Status code {}".format(
            r.status_code))
    else:
        goals = r.json()["data"]["goal"]
        logging.info("There are {} person(s) with goals".format(len(goals)))
        for person in goals:
            logging.info("{} has {} goals".format(person["user_email"],
                                                  len(person["people_goals"])))
            for g in person["people_goals"]:
                try:
                    cur.execute(
                        """INSERT INTO goals_raw (goal_id, goal_data)
                           VALUES (%s, %s) ON CONFLICT (goal_id)
                           DO UPDATE SET goal_data = EXCLUDED.goal_data
                        """,
                        [int(g["goal_id"]), json.dumps(g)],
                    )
                    logging.info("Goal id #{} inserted into the table".format(
                        g["goal_id"]))
                except BaseException:
                    logging.warning("Record skipped")
                    pass

    cur.close()
    conn.close()

Example #22

0

Show file

File: init.py Project: mvsousa/mac499

def move_conform_to_final(**kwargs):
    ti = kwargs['ti']
    file = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                        key='currently_processing')
    lote_id = ti.xcom_pull(task_ids='generate_unique_lote_id', key='lote_id')
    source = ti.xcom_pull(task_ids='move_csv_to_stagging_area', key='source')
    file_size = ti.xcom_pull(task_ids='move_csv_to_stagging_area',
                             key='file_size')
    table = ti.xcom_pull(task_ids='set_base_table', key='table')
    query = """SELECT * FROM {} WHERE loteId = '{}';""".format(table, lote_id)

    src_conn = PostgresHook(postgres_conn_id='etl_stage_conform').get_conn()
    dest_conn = PostgresHook(postgres_conn_id='etl_stage_final').get_conn()

    src_cursor = src_conn.cursor()
    dest_cursor = dest_conn.cursor()

    query_header = """ select column_name from information_schema.columns where table_name = '{}';""".format(
        table)
    src_cursor.execute(query_header)
    headers_result = src_cursor.fetchmany(size=1000)
    headers = [str(s[0]).lower() for s in headers_result]

    src_cursor.execute(query)
    count = 0
    set_lote_id_begin(dest_cursor, lote_id, source, file_size)
    while True:
        records = src_cursor.fetchmany(size=1)
        if not records:
            break
        insert_final_table(dest_cursor, table, records, headers)
        count += 1
    set_lote_id_end(dest_cursor, lote_id, count, source, file_size)
    dest_conn.commit()
    src_cursor.close()
    dest_cursor.close()
    src_conn.close()
    dest_conn.close()
    return True

Example #23

0

Show file

File: af_fm_count_act_product.py Project: 1minimalist/dags

def agg_count():
    conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn()
    cur = conn.cursor()

    cur.execute(
        'delete from dar_group.agg_fm_count_act_product where date_ = current_date'
    )
    conn.commit()
    word1 = '{darbazar}'
    word2 = 'Архив'
    s = 'insert into dar_group.agg_fm_count_act_product\
        select current_date as date_, c.title as category,\
        count(distinct f.id) as koltovara\
        from dar_group.fm_showcase f\
        join dar_group.darbazar_categories c on substring(f.default_category, 7, 36) = c.id\
        where f.is_visible = \'true\' and f.scope_ = \'{}\' and c.title != \'{}\' \
        group by c.title,f.merchant_id'.format(word1, word2)
    cur.execute(s)
    conn.commit()

    cur.close()
    conn.close()

Example #24

0

Show file

File: af_digital_orders.py Project: 1minimalist/dags

def read_cassandra():
    conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn()
    cur = conn.cursor()

    cluster = Cluster(["10.103.5.51", "10.103.5.52", "10.103.5.53"], port=9042)
    session = cluster.connect('darbiz', wait_for_all_pools=True)
    session.default_timeout = 10000
    rows = session.execute('select * from forte_market_digital_orders')

    cur.execute('truncate table dar_group.digital_orders')

    for row in rows:
        d = json.loads(row.order_data)
        order_id = d['uid']
        cert_amount = d['price']
        order_type = d['order_type']
        is_gift_cert = d['is_gift_cert']
        cert_title = d['cert_title'].strip()
        gift_receiver = d['gift_receiver']['name'].strip()
        gift_receiver_mail = d['gift_receiver']['email'].strip()
        gift_receiver_mobile = d['gift_receiver']['mobile'].strip()
        act_code = d['activation_code'].strip()
        is_paid = d['paid']
        pay_type = d['pay_type'].strip()
        cert_name = d['pay_title'].strip()
        created_on = d['created_on']
        delivery_date = d['delivery_date']
        expiry_date = d['expiring_date'] if 'expiring_date' in d else None
        is_act = d['is_activated'] if 'is_activated' in d else None

        cur.execute ("INSERT INTO dar_group.digital_orders(order_id,order_type,created_dtime,is_gift_cert,cert_title,cert_name,cert_amount,\
                activation_code,is_paid,is_activated,pay_type,gift_receiver,gift_receiver_mail,gift_receiver_mobile,delivery_dtime,expiry_dtime) \
                VALUES (%s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s,%s,%s)"                                                                                     , (order_id,order_type,created_on,is_gift_cert,cert_title,cert_name,cert_amount,\
                        act_code,is_paid,is_act,pay_type,gift_receiver,gift_receiver_mail,gift_receiver_mobile,delivery_date,expiry_date))
        conn.commit()

    cur.close()
    conn.close()

Example #25

0

Show file

    def execute(self, context):
        # self.log.info('LoadDimensionOperator not implemented yet')
        try:
            conn = PostgresHook(self.redshift_conn_id).get_conn()

            logging.info(
                f"Ensuring Distinct Records::{self.table_name}, on::{self.distinct_column}"
            )

            cursor = conn.cursor()

            cursor.execute(f'select count(*) from {self.table_name}')

            total_count = cursor.fetchone()

            logging.info(f"Total Count::{total_count}")

            cursor.execute(
                f'select count (distinct {self.distinct_column}) from {self.table_name}'
            )

            unique_count = cursor.fetchone()

            logging.info(f"Unique Count::{unique_count}")

            if total_count != unique_count:
                raise Exception(
                    "Total count does not match unique count. Invalid duplicate records."
                )

            conn.commit()

            cursor.close()

            conn.close()

        except Exception as e:
            raise e

Example #26

0

Show file

File: prediction.py Project: akumuthan-dev/data-flow

def fetch_interaction_data(target_db: str, query: str):
    logger.info("starting fetching data")

    try:
        # create connection with named cursor to fetch data in batches
        connection = PostgresHook(postgres_conn_id=target_db).get_conn()
        cursor = connection.cursor(name='fetch_interaction')
        cursor.execute(query)

        rows = cursor.fetchall()
        df = pd.DataFrame(rows)
        df.columns = [column[0] for column in cursor.description]
        df, _ = preprocess(df, action='predict')

        logger.info(f"check df shape: {df.shape}")
        logger.info(f"df head: {df.head()}")

    finally:
        if connection:
            cursor.close()
            connection.close()

    return df

Example #27

0

Show file

File: get_data.py Project: CurtLH/airflow_dags

def get_team_member_data():

    conn = PostgresHook(postgres_conn_id="postgres_workboard").get_conn()
    conn.autocommit = True
    cur = conn.cursor()

    cur.execute("SELECT team_id FROM teams_raw")
    team_ids = [line[0] for line in cur]

    params = {"token": Variable.get("workboard_token")}
    base_url = "https://www.myworkboard.com/wb/apis"

    for t in team_ids:
        r = requests.get(base_url + "/team/{}".format(t), params=params)
        if r.status_code != 200:
            logging.warning("Request not successful. Status code {}".format(
                r.status_code))
            pass
        else:
            team = r.json()["data"]["team"]
            try:
                cur.execute(
                    """INSERT INTO team_members_raw (team_id, team_member_data)
                        VALUES (%s, %s) ON CONFLICT (team_id)
                        DO UPDATE SET team_member_data = EXCLUDED.team_member_data
                    """,
                    [team["team_id"], json.dumps(team)],
                )
                logging.info("Team id #{} inserted into the table".format(
                    team["team_id"]))
            except BaseException:
                logging.warning("Record skipped")
                pass

    cur.close()
    conn.close()

Example #28

0

Show file

File: af_dim_merch_tariff.py Project: 1minimalist/dags

def get_aerospike_data():
    try:
        client = aerospike.client(config).connect()
        print(datetime.now(), "Connected to aerospike host:", config['hosts'])
    except:
        print("Failed to connect to the cluster with", config['hosts'])
        sys.exit(1)

    conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn()
    cur = conn.cursor()
    cur.execute('delete from dar_group.merch_com')
    conn.commit()
    query = client.query('merchants', 'contracts')
    records = query.results()
    client.close()

    print('Starting insert...')

    for x in records:
        id1 = x[2]['merchant_id'] if 'merchant_id' in x[2] else None

        if 'tariffs' in x[2]:
            try:
                s = json.loads(str(x[2]['tariffs']).replace("'", '"'))
                if s == {}:
                    id4 = ''
                    id5 = ''
                    id6 = ''
                    id7 = ''
                    id8 = ''
                    id9 = ''
                    id10 = ''
                    id11 = ''
                    id12 = ''
                    id13 = ''
                    id14 = ''
                for i in s:
                    id4 = i
                    id5 = s[i]['FORTE_EXPRESS_0_4'] if 'FORTE_EXPRESS_0_4' in s[
                        i] else None
                    id6 = s[i][
                        'FORTE_EXPRESS_0_12'] if 'FORTE_EXPRESS_0_12' in s[
                            i] else None
                    id7 = s[i][
                        'FORTE_EXPRESS_0_24'] if 'FORTE_EXPRESS_0_24' in s[
                            i] else None
                    id8 = s[i][
                        'FORTE_EXPRESS_18_6'] if 'FORTE_EXPRESS_18_6' in s[
                            i] else None
                    id9 = s[i][
                        'FORTE_EXPRESS_18_12'] if 'FORTE_EXPRESS_18_12' in s[
                            i] else None
                    id10 = s[i][
                        'FORTE_EXPRESS_18_24'] if 'FORTE_EXPRESS_18_24' in s[
                            i] else None
                    id11 = s[i]['CARD_0_0'] if 'CARD_0_0' in s[i] else None
                    id12 = s[i]['ACQUIRING_0_4'] if 'ACQUIRING_0_4' in s[
                        i] else None
                    id13 = s[i]['ACQUIRING_0_6'] if 'ACQUIRING_0_6' in s[
                        i] else None
                    id14 = s[i]['ACQUIRING_0_12'] if 'ACQUIRING_0_12' in s[
                        i] else None

                    cur.execute(
                        "INSERT INTO dar_group.merch_com(merchant_id,cat_id,FORTE_EXPRESS_0_4,FORTE_EXPRESS_0_12,FORTE_EXPRESS_0_24,FORTE_EXPRESS_18_6, \
                                FORTE_EXPRESS_18_12,FORTE_EXPRESS_18_24,CARD_0_0,ACQUIRING_0_4,ACQUIRING_0_6,ACQUIRING_0_12) \
                                VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                        (id1, id4, id5, id6, id7, id8, id9, id10, id11, id12,
                         id13, id14))
                    conn.commit()
                    print('Written portion...')
            except:
                pass

    cur.close()
    conn.close()

Example #29

0

Show file

def execute_insert_into(target_db,
                        table_name=None,
                        run_fetch_task_id=None,
                        field_mapping=None,
                        task_instance=None,
                        **kwargs):
    """Inserts each paginated response data into target database table.
    Polls to find variable hasn't been processed, generates regarding sql statement to
    insert data in, incrementally waits for new variables.
    Success depends on fetcher task completion.
    """

    insert_into_sql = """
        INSERT INTO {{ table_name }} (
        {% for _, tt_field_name, _ in field_mapping %}
            {{ tt_field_name }}{{ "," if not loop.last }}
        {% endfor %}
        )
        VALUES
        {% for record in record_subset %}
        (
            {% for st_field_name, _, _ in field_mapping %}
                {% if not record[st_field_name] or record[st_field_name] == 'None' %}
                    NULL
                {% else %}
                    {{ record[st_field_name] }}
                {% endif %}
                {{ "," if not loop.last }}
            {% endfor %}
        {{ ")," if not loop.last }}
        {% endfor %}
        );
    """
    # Give some initial time to fetch task to get a page and save it into variable
    time.sleep(3)
    # Used for providing incremental wait
    sleep_time = 5
    number_of_run = 1
    redis_client = get_redis_client()
    table_exists = task_instance.xcom_pull(
        task_ids='check-if-table-exists')[0][0]
    if table_exists and table_exists != 'None':
        table_name = f'{table_name}_copy'
    try:
        target_db_conn = PostgresHook(postgres_conn_id=target_db).get_conn()
        target_db_cursor = target_db_conn.cursor()

        while True:
            var_name = get_available_page_var(redis_client, run_fetch_task_id)
            if var_name:
                logging.info(f'Got the unprocessed var_name {var_name}')
                sleep_time = 5
                var_name = var_name.decode('utf-8')
                try:
                    record_subset = json.loads(Variable.get(var_name))
                except KeyError:
                    logging.info(
                        f'Var {var_name} no more exist! It is processed by another worker. Moving on.'
                    )
                    continue

                escaped_record_subset = []
                for record in record_subset:
                    escaped_record = {}
                    for key, value in record.items():
                        if value and value != 'None':
                            escaped_record[key] = sql.Literal(value).as_string(
                                target_db_conn)
                        else:
                            escaped_record[key] = sql.Literal(None).as_string(
                                target_db_conn)
                    escaped_record_subset.append(escaped_record)

                exec_sql = Template(insert_into_sql).render(
                    table_name=sql.Identifier(table_name).as_string(
                        target_db_conn),
                    field_mapping=field_mapping,
                    record_subset=escaped_record_subset,
                )
                target_db_cursor.execute(exec_sql)
                logging.info(f'Deleting the var_name {var_name}')
                Variable.delete(var_name)
            else:
                # Check if fetch task completed successfully, if it's, break out of loop and commit
                # the transaction because there is no more page to process. If it's failed raise Exception so that
                # transaction will be rollbacked
                state = task_instance.xcom_pull(key='state',
                                                task_ids=run_fetch_task_id)
                logging.info(f'Checking the state of fetcher task {state}')
                if state is False:
                    raise Exception('Fetcher task failed!')
                elif state is True:
                    logging.info(
                        'Fetcher task successfully completed and there is no more variable to process.'
                    )
                    break
                else:
                    logging.info(
                        f'Sleeping for {sleep_time} fetcher task to catchup')
                    sleep_time = sleep_time * number_of_run
                    time.sleep(sleep_time)
                    number_of_run += 1

        target_db_conn.commit()
        task_instance.xcom_push(key='state', value=True)

    # TODO: Gotta Catch'm all
    except Exception as e:
        logging.error(f'Exception: {e}')
        target_db_conn.rollback()
        task_instance.xcom_push(key='state', value=False)
        raise

    finally:
        if target_db_conn:
            target_db_cursor.close()
            target_db_conn.close()

Example #30

0

Show file

                    oracle_conn_id='oracle_src',
                    postgres_conn_id='postgres_tgt',
                    # provide_context=True,
                    params=params,
                    batch_size=int(Variable.get("oebs.select.batch.size", default_var=5000))
                )

                check_target_schema >> po >> update_tables

    update_activity_table = PostgresOperator(
        task_id='update_activity',
        sql= '''
            select 'bla bla bla';
        ''',
        postgres_conn_id='postgres_tgt',
        autocommit=True
    )

    update_cases_table = PostgresOperator(
        task_id='update_cases',
        sql= '''
            select 'bla bla bla';
        ''',
        postgres_conn_id='postgres_tgt',
        autocommit=True
    )

    update_tables >> [update_activity_table, update_cases_table]

    tgt_conn.close()