Esempi in Python per SqliteHook.SqliteHook, esempi in Python per airflow.hooks.sqlite_hook.SqliteHook.SqliteHook

Esempio n. 1

0

Mostra file

File: weather.py Progetto: tarasen-mv/apache-airflow

def process_weather(station, field_to_process):
    db_hook = SqliteHook(conn_name_attr='sqlite_default')

    weather_select = (f'select record_date, {field_to_process} '
                      f'from weather where station_id={station} '
                      f'order by record_date;')

    data = db_hook.get_pandas_df(weather_select)
    average = data.rolling(
        3, center=True).mean().rename(columns={field_to_process: 'average'})
    data = data.merge(average, left_index=True, right_index=True)

    del average

    weather_update = """
    update weather
    set average = ?
    where station_id=? and record_date=?;
    """

    # iteration over data is used like a hack to avoid using either DataFrame.itertuples(), iteritems() or iterrows(),
    # which may be a bottleneck in case if number of rows is more then several thousands

    data.apply(lambda row: db_hook.run(weather_update,
                                       parameters=(row['average'], station,
                                                   row['record_date'])),
               axis=1)

Esempio n. 2

0

Mostra file

File: covid_datapipe.py Progetto: AkaraponA/Covid19-DataPipeline

def save_data_into_db():
    sqlite_hook = SqliteHook(sqlite_conn_id='playgroundDB')
    with open('data.json') as f:
        data = json.load(f)
    insert = """
            INSERT INTO Covid19Report (
                confirmed,
                recovered,
                hospitalized,
                deaths,
                new_confirmed,
                new_recovered,
                new_hospitalized,
                new_deaths,
                update_date,
                source,
                dev_by,
                sever_by)
            VALUES (?,?,?,?,?,?,?,?,?,?,?,?);
             """
    sqlite_hook.run(
        insert,
        parameters=(data['Confirmed'], data['Recovered'], data['Hospitalized'],
                    data['Deaths'], data['NewConfirmed'], data['NewRecovered'],
                    data['NewHospitalized'], data['NewDeaths'],
                    datetime.strptime(data['UpdateDate'], '%d/%m/%Y %H:%M'),
                    data['Source'], data['DevBy'], data['SeverBy']))

Esempio n. 3

0

Mostra file

def load_csv_to_sqlite(csv_path, sqlite_table, **context):
    """
    Loads CSV data (i.e. research_papers and mct_talks) in the default Sqlite DB.
    """
    df = pd.read_csv(csv_path)
    with SqliteHook().get_conn() as conn:
        df.to_sql(sqlite_table, con=conn, if_exists='replace')

Esempio n. 4

0

Mostra file

File: currency.py Progetto: arpitabksaha/Airflow-Currency

def transform_data():
    import pyspark
    import pandas as pd
    from pyspark.sql import SQLContext
    from pyspark import SparkContext

    dest = SqliteHook(sqlite_conn_id='sqlite2')
    dest_conn = dest.get_conn()
    BASE_DIR = os.path.dirname(os.path.abspath("__file__"))

    sc = pyspark.SparkContext()
    sqlContext = SQLContext(sc)

    df = sc.textFile(os.path.join(BASE_DIR, "InputFile.txt"))
    df.collect()
    sparkDF = df.map(lambda x: str(x).translate({ord(c): None
                                                 for c in '][""'})
                     ).map(lambda w: w.split(',')).toDF()
    pdDF = sparkDF.toPandas()

    sqlUpdate = 'INSERT OR REPLACE INTO Currency(USD, JPY, CAD, GBP, NZD, INR, Date_of_rate) VALUES (?, ?, ?, ?, ?, ?, ?)'
    data = pdDF.values
    dest_conn.executemany(sqlUpdate, data)
    dest_conn.commit()
    sc.stop()

Esempio n. 5

0

Mostra file

File: weather.py Progetto: tarasen-mv/apache-airflow

def get_weather(url_params):
    db_hook = SqliteHook(conn_name_attr='sqlite_default ')
    api_hook = HttpHook(http_conn_id='http_default', method='GET')

    url = add_or_replace_parameters(f'v1/history/daily', url_params)

    resp = api_hook.run(url)
    data = resp.json()['data']

    # usually I don't really care about this, but in case of big data, I guess it may be very useful
    del resp

    weather_insert = """
    insert or ignore into weather 
    (station_id, record_date, temperature, 
    temperature_min, temperature_max, winddirection, windspeed, sunshine, pressure) 
    values (?, ?, ?, ?, ?, ?, ?, ?, ?);
    """

    for day in data:
        db_hook.run(weather_insert,
                    parameters=(url_params['station'], day['date'],
                                day['temperature'], day['temperature_min'],
                                day['temperature_max'], day['winddirection'],
                                day['windspeed'], day['sunshine'],
                                day['pressure']))

Esempio n. 6

0

Mostra file

def getdf():
    import os.path
    h = SqliteHook(conn_id)
    df = h.get_pandas_df(
        "SELECT A.DocumentNo, A.FullName, A.Device, A.Country, B.OrderId, B.DocumentNo, B.OrderDate, B.CatalogId,C.CatalogId, C.ProductId, C.CUID, D.ProductId, D.ProductName, D.CUID FROM CUSTOMERS as A inner join ORDERS as B on A.DocumentNo = B.DocumentNo inner join CATALOG as C on C.CatalogId = B.CatalogId inner join PRODUCTS AS D ON C.CUID = D.CUID"
    )
    print(df)
    df.to_csv(os.path.join(BASE_DIR, "SourceData.txt"), index=False)

Esempio n. 7

0

Mostra file

File: currency.py Progetto: arpitabksaha/Airflow-Currency

def createdb():

    try:
        dest = SqliteHook(sqlite_conn_id='sqlite2')
        dest_conn = dest.get_conn()
        dest_conn.execute(
            '''CREATE TABLE if not exists Currency(USD text, JPY text, CAD text, GBP text, NZD text, INR text, Date_of_rate date)'''
        )
    except:
        print("SQLite Connection Failed")

Esempio n. 8

0

Mostra file

File: data_pipeline.py Progetto: enricapq/docker-airflow-workshop

    def etl_data(**kwargs):
        # Extract Transform Load csv in DB
        task_instance = kwargs['ti']
        filename = task_instance.xcom_pull(task_ids='get_file')

        conn_host = SqliteHook(sqlite_conn_id='sqlite_db', supports_autocommit = True).get_conn()

        loader = ETLoader(filename, downloads_path_dir, conn_host, 'revenues', revenue_table)
        loader.orchestrate_etl()

        return filename

Esempio n. 9

0

Mostra file

 def get_hook(self):
     try:
         if self.conn_type == 'mysql':
             from airflow.hooks.mysql_hook import MySqlHook
             return MySqlHook(mysql_conn_id=self.conn_id)
         elif self.conn_type == 'google_cloud_platform':
             from airflow.contrib.hooks.bigquery_hook import BigQueryHook
             return BigQueryHook(bigquery_conn_id=self.conn_id)
         elif self.conn_type == 'postgres':
             from airflow.hooks.postgres_hook import PostgresHook
             return PostgresHook(postgres_conn_id=self.conn_id)
         elif self.conn_type == 'hive_cli':
             from airflow.hooks.hive_hooks import HiveCliHook
             return HiveCliHook(hive_cli_conn_id=self.conn_id)
         elif self.conn_type == 'presto':
             from airflow.hooks.presto_hook import PrestoHook
             return PrestoHook(presto_conn_id=self.conn_id)
         elif self.conn_type == 'hiveserver2':
             from airflow.hooks.hive_hooks import HiveServer2Hook
             return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
         elif self.conn_type == 'sqlite':
             from airflow.hooks.sqlite_hook import SqliteHook
             return SqliteHook(sqlite_conn_id=self.conn_id)
         elif self.conn_type == 'jdbc':
             from airflow.hooks.jdbc_hook import JdbcHook
             return JdbcHook(jdbc_conn_id=self.conn_id)
         elif self.conn_type == 'mssql':
             from airflow.hooks.mssql_hook import MsSqlHook
             return MsSqlHook(mssql_conn_id=self.conn_id)
         elif self.conn_type == 'oracle':
             from airflow.hooks.oracle_hook import OracleHook
             return OracleHook(oracle_conn_id=self.conn_id)
         elif self.conn_type == 'vertica':
             from airflow.contrib.hooks.vertica_hook import VerticaHook
             return VerticaHook(vertica_conn_id=self.conn_id)
         elif self.conn_type == 'cloudant':
             from airflow.contrib.hooks.cloudant_hook import CloudantHook
             return CloudantHook(cloudant_conn_id=self.conn_id)
         elif self.conn_type == 'jira':
             from airflow.contrib.hooks.jira_hook import JiraHook
             return JiraHook(jira_conn_id=self.conn_id)
         elif self.conn_type == 'redis':
             from airflow.contrib.hooks.redis_hook import RedisHook
             return RedisHook(redis_conn_id=self.conn_id)
         elif self.conn_type == 'wasb':
             from airflow.contrib.hooks.wasb_hook import WasbHook
             return WasbHook(wasb_conn_id=self.conn_id)
         elif self.conn_type == 'docker':
             from airflow.hooks.docker_hook import DockerHook
             return DockerHook(docker_conn_id=self.conn_id)
     except:
         pass

Esempio n. 10

0

Mostra file

def get_num_active_dagruns(dag_id, conn_id='sqlite_default'):
    # if you've opted for a different backend for airflow, you will need to
    # refactor the two lines below. For a Postgres example, please refer to
    # https://github.com/Nextdoor/airflow_examples/blob/master/dags/util.py#L8
    airflow_db = SqliteHook(sqlite_conn_id=conn_id)
    conn = airflow_db.get_conn()
    cursor = conn.cursor()
    sql = """
         select count(*)
         from dag_run
         where dag_id = '{dag_id}'
         and state in ('running', 'queued', 'up_for_retry')
          """.format(dag_id=dag_id)
    cursor.execute(sql)
    num_active_dagruns = cursor.fetchone()[0]
    return num_active_dagruns

Esempio n. 11

0

Mostra file

def train_model(classifier, **context):
    """
    Trains a model using the given classifier and stores it at MODEL_PATH.
    """
    # Load data
    df = SqliteHook().get_pandas_df('select * from training_data')

    # Create pipeline
    pipeline = Pipeline([('vectorizer', CountVectorizer(stop_words='english')),
                         ('classifier', classifier())])

    # Train model
    pipeline.fit(df['Title'].tolist(), df['Conference'].tolist())

    # Save model
    _save_model(pipeline, classifier.__name__)

Esempio n. 12

0

Mostra file

File: create_device_entries.py Progetto: antoine3000/smartcitizen-data

def fill_devices_table(**kwargs):
    ti = kwargs['ti']
    devices = ti.xcom_pull(task_ids='get_devices_dates')
    conn_host = SqliteHook(sqlite_conn_id='sqlite_devices').get_conn()

    for device in devices:
        sql_insert = f"""INSERT OR REPLACE INTO {DEVICES_TABLE} 
                     (device, last_reading, latest_postprocessing)
                     VALUES ({device},
                             '{devices[device]["last_reading"]}',
                             '{devices[device]["latest_postprocessing"]}'
                            )
                     ;"""

        conn_host.execute(sql_insert)
        conn_host.commit()
    conn_host.close()

Esempio n. 13

0

Mostra file

File: covid_datapipe.py Progetto: AkaraponA/Covid19-DataPipeline

def create_covid19_report_table():
    sqlite_hook = SqliteHook(sqlite_conn_id='playgroundDB')
    create = """
             CREATE TABLE  "Covid19Report" (
	            "confirmed"	INTEGER,
	            "recovered"	INTEGER,
	            "hospitalized"	INTEGER,
	            "deaths"	INTEGER,
	            "new_confirmed"	INTEGER,
	            "new_recovered"	INTEGER,
	            "new_hospitalized"	INTEGER,
	            "new_deaths"	INTEGER,
	            "update_date"	TEXT,
                "source"    TEXT,
                "dev_by"    TEXT,
                "sever_by"  TEXT);
             """
    sqlite_hook.run(create)

Esempio n. 14

0

Mostra file

def predict(classifier, **context):
    """
    Makes predictions for a model created by the given classifier and returns its
    stores the results in the mct_talks table.
    """
    # Load model
    model = _load_model(classifier.__name__)

    # Load data
    db = SqliteHook()
    df = db.get_pandas_df('select * from mct_talks')

    # Make predictions
    df['Conference'] = model.predict(df['Title'].tolist())

    # Save predictions
    with db.get_conn() as conn:
        df.to_sql('mct_talks', con=conn, if_exists='replace')

Esempio n. 15

0

Mostra file

def split_data(**context):
    """
    Splits the sample data (i.e. research_papers) into training and test sets
    and stores them in the Sqlite DB.
    """
    # Load full dataset
    db = SqliteHook()
    df = db.get_pandas_df('select * from research_papers')

    # Create train/test split
    train, _ = train_test_split(df.index,
                                test_size=0.33,
                                stratify=df['Conference'],
                                random_state=42)

    # Save training and test data in separate tables
    with db.get_conn() as conn:
        df.iloc[train].to_sql('training_data', con=conn, if_exists='replace')
        df.drop(train).to_sql('test_data', con=conn, if_exists='replace')

Esempio n. 16

0

Mostra file

def save_model_accuracy(**kwargs):
    # Tasks can pass parameters to downstream tasks through the XCom space.
    # XCom (cross-communication) allows communication between task instances.
    # In this example the current task `save_model_accuracy` takes the output
    # of the previous task `measure_accuracy` "pulling" it from the XCom space
    ti = kwargs['ti']
    accuracy = ti.xcom_pull(task_ids='measure_accuracy')

    sql_insert = f"""INSERT INTO {TRAINING_TABLE} 
                            (mape_test, rmse_test, days_in_test)
                     VALUES({accuracy['mape_test']}, 
                            {accuracy['rmse_test']},
                            {accuracy['days_in_test']})
                    ;
                  """
    # Hooks are interface to external platforms (e.g. Amazon S3)
    # and DBs (e.g. SQLite DB, PostgreSQL)
    conn_host = SqliteHook(sqlite_conn_id='sqlite_ml').get_conn()
    conn_host.execute(sql_insert)
    conn_host.commit()

Esempio n. 17

0

Mostra file

def createdb():
    dest = SqliteHook(sqlite_conn_id='sqlite2')
    dest_conn = dest.get_conn()
    dest_conn.execute(
        '''CREATE TABLE if not exists ORDERS(OrderId text, DocumentNo text, OrderDate text, CatalogId text)'''
    )
    dest_conn.execute(
        '''CREATE TABLE if not exists CUSTOMERS(DocumentNo text, FullName text, Device text, Counttry real)'''
    )
    dest_conn.execute(
        '''CREATE TABLE if not exists CATALOG(CatalogId text, ProductId text, CUID text)'''
    )
    dest_conn.execute(
        '''CREATE TABLE if not exists PRODUCTS(ProductId text, ProductName text, CUID text)'''
    )
    dest_conn.execute(
        '''CREATE TABLE if not exists LEADS(Id text, CustomerId text)''')
    dest_conn.execute(
        '''CREATE TABLE if not exists LOGS(ClientIp text, UserName text, Time text)'''
    )

Esempio n. 18

0

Mostra file

def test_model(classifier, **context):
    """
    Tests a model created by the given classifier and returns its accuracy score
    as an XCom.
    """

    # Simulate a task failure
    # raise RuntimeError('BOOM!')

    # Load model
    model = _load_model(classifier.__name__)

    # Load data
    df = SqliteHook().get_pandas_df('select * from test_data')

    # Make predictions
    X, y = df['Title'].tolist(), df['Conference'].tolist()
    y_pred = model.predict(X)

    # Log classification results
    print(classification_report(y, y_pred))

    # Push Accuracy as XCom
    return classifier.__name__, f1_score(y, y_pred, average='weighted')

Esempio n. 19

0

Mostra file

def save_prediction(**kwargs):
    # Tasks can pass parameters to downstream tasks through the XCom space.
    # In this example the current task `save_prediction` takes the output of
    # the previous task `run_prediction` "pulling" it from the XCom space
    ti = kwargs['ti']
    prediction_dict = ti.xcom_pull(task_ids='run_prediction')
    # INSERT OR REPLACE guarantees to have unique row for each date_to_predict:
    # it inserts a new prediction if it doesn't exists or replace the existing
    # one due to the index idx_date_to_predict on date_to_predict
    sql_insert = f"""INSERT OR REPLACE INTO {PREDICTION_TABLE} 
                     (date_to_predict, run_date, yhat, yhat_upper, yhat_lower)
                     VALUES ('{prediction_dict["date_to_predict"]}', 
                             '{prediction_dict["run_date"]}',
                             {prediction_dict["yhat"]}, 
                             {prediction_dict["yhat_upper"]},
                             {prediction_dict["yhat_lower"]}
                            )
                     ;"""
    # Hooks are interface to external platforms (e.g. Amazon S3)
    # and DBs (e.g. SQLite DB, PostgreSQL)
    conn_host = SqliteHook(sqlite_conn_id='sqlite_ml').get_conn()
    conn_host.execute(sql_insert)
    conn_host.commit()
    conn_host.close()

Esempio n. 20

0

Mostra file

File: sqlite_operator.py Progetto: RogerThomas/incubator-airflow

 def execute(self, context):
     _log.info('Executing: ' + self.sql)
     hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id)
     hook.run(self.sql, parameters=self.parameters)

Esempio n. 21

0

Mostra file

File: connection.py Progetto: kpathak13/apache-airflow

 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.hooks.mysql_hook import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.hooks.postgres_hook import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.hooks.pig_hook import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.hooks.hive_hooks import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.hooks.presto_hook import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.hooks.hive_hooks import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.hooks.sqlite_hook import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.hooks.jdbc_hook import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.hooks.mssql_hook import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.hooks.oracle_hook import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.contrib.hooks.vertica_hook import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.contrib.hooks.cloudant_hook import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.contrib.hooks.jira_hook import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.contrib.hooks.redis_hook import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.contrib.hooks.wasb_hook import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.hooks.docker_hook import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.contrib.hooks.cassandra_hook import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.contrib.hooks.mongo_hook import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSqlDatabaseHook
         return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.contrib.hooks.grpc_hook import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))

Esempio n. 22

0

Mostra file

File: test_adapter.py Progetto: rufuspollock/airtunnel

 def make_hook() -> Optional[BaseHook]:
     """ Makes a SqliteHook to test with. """
     return SqliteHook()

Esempio n. 23

0

Mostra file

 def execute(self, context):
     self.log.info('Executing: %s', self.sql)
     hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id)
     return hook.get_records(self.sql, parameters=self.parameters)

Esempio n. 24

0

Mostra file

File: sqlite_operator.py Progetto: wing1124/incubator-airflow

 def execute(self, context):
     self.logger.info('Executing: %s', self.sql)
     hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id)
     hook.run(self.sql, parameters=self.parameters)

Esempio n. 25

0

Mostra file

def updatedb():
    #read extract file
    import re
    import re, datetime
    BASE_DIR = os.path.dirname(os.path.abspath("__file__"))
    extract_file = pd.read_csv(os.path.join(BASE_DIR, "SourceData.txt"))

    lead_file_name = os.path.join(BASE_DIR, "Marketing_Lead.xlsx")
    LeadFile = pd.read_excel(lead_file_name)

    log_file_name = os.path.join(BASE_DIR, "logFile.log")
    LogFile = pd.DataFrame(columns=["ClientIP", "UserName", "Time"])
    with open(log_file_name, "r") as logfile:
        Lines = logfile.readlines()
        Linelist = []
        for line in Lines:
            line.strip()
            userregex = "|".join([
                r'(\"[^\d\W]+[^\S][^\d\W]+\")',
                r'(\"[^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+\")',
                r'(\"[^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+\")',
                r'(\"[^\d\W]+\.[^\S][^\d\W]+[^\S][^\d\W]+\")',
                r'(\"[^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+\.\")',
                r'(\"[^\d\W]+\.[^\S][^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+\.\")',
                r'(\"[^\d\W]+\.[^\S][^\d\W]+[^\S][^\d\W]+[^\S][^\d\W]+\")'
            ])
            matchHost = re.search(r'([(\d\.)]+)', line).group(0)
            matchUser = re.search(userregex, line).group(0)
            matchTime = re.search(r'(?P<time>\[.*?\])', line).group(0)
            print(matchHost)
            print(matchUser)
            print(matchTime)
            listitem = (matchHost, matchUser, matchTime)
            Linelist.append(listitem)
        LogfileDF = pd.DataFrame(Linelist,
                                 columns=["ClientIP", "UserName", "Time"])
        print(LogfileDF)

    LeadFileDF = LeadFile[["Id", "Company_Id"]]
    OrdersDF = extract_file[[
        "OrderId", "DocumentNo", "OrderDate", "CatalogId"
    ]]
    CustomersDF = extract_file[["DocumentNo", "FullName", "Device", "Country"]]
    ProductsDF = extract_file[["ProductId", "ProductName", "CUID.1"]]
    CatalogDF = extract_file[["CatalogId", "ProductId", "CUID"]]

    #write update logic for customers - only those that are not already existing, catalog - only those that are new
    dest = SqliteHook(sqlite_conn_id='sqlite2')
    dest_conn = dest.get_conn()
    sqlOrders = 'INSERT OR REPLACE INTO ORDERS(OrderId, DocumentNo, OrderDate, CatalogId) VALUES (?, ?, ?, ?)'
    dataOrders = OrdersDF.values
    dest_conn.executemany(sqlOrders, dataOrders)
    dest_conn.commit()

    sqlCustomers = 'INSERT OR REPLACE INTO CUSTOMERS(DocumentNo, FullName, Device, Counttry) VALUES (?, ?, ?, ?)'
    dataCust = CustomersDF.values
    dest_conn.executemany(sqlCustomers, dataCust)
    dest_conn.commit()

    sqlProducts = 'INSERT OR REPLACE INTO PRODUCTS(ProductId, ProductName, CUID) VALUES (?, ?, ?)'
    dataProducts = ProductsDF.values
    dest_conn.executemany(sqlProducts, dataProducts)
    dest_conn.commit()

    sqlCatalog = 'INSERT OR REPLACE INTO CATALOG(CatalogId, ProductId, CUID) VALUES (?, ?, ?)'
    dataCatalog = CatalogDF.values
    dest_conn.executemany(sqlCatalog, dataCatalog)
    dest_conn.commit()

    sqlLogs = 'INSERT OR REPLACE INTO LOGS(ClientIP, UserName, Time) VALUES (?, ?, ?)'
    dataLog = LogfileDF.values
    dest_conn.executemany(sqlLogs, dataLog)
    dest_conn.commit()

    sqlLead = 'INSERT OR REPLACE INTO LEADS(Id, CustomerId) VALUES (?, ?)'
    dataLeadFile = LeadFileDF.values
    dest_conn.executemany(sqlLead, dataLeadFile)
    dest_conn.commit()

Esempio n. 26

0

Mostra file

def test_db_hook(test_db_path, provide_airflow_cfg):
    from airflow.hooks.sqlite_hook import SqliteHook

    os.environ[
        "AIRFLOW_CONN_SQLITE"] = f"sqlite:///{urllib.parse.quote_plus(test_db_path)}"
    return SqliteHook("sqlite")