Python SqliteHook Examples

Programming Language: Python

Namespace/Package Name: airflow.hooks

Class/Type: SqliteHook

Examples at hotexamples.com: 10

Python SqliteHook - 10 examples found. These are the top rated real world Python examples of airflow.hooks.SqliteHook extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SqliteHook(6)

get_conn(4)

run(1)

Frequently Used Methods

SqliteHook (6)

get_conn (4)

run (1)

Example #1

Show file

File: generate_twitter.py Project: drhbigdave/data-pipelines-course

def generate_search_terms(**kwargs):
    """ Generate subdag to search twitter for terms. """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    query = "select * from twitter_terms"
    df = pd.read_sql_query(query, conn)
    return random.choice([
        'search_{}_twitter'.format(re.sub(r'\W+', '', t))
        for t in df.search_term.values])

Example #2

Show file

File: generate_twitter.py Project: drhbigdave/data-pipelines-course

def fill_terms(my_terms=SEARCH_TERMS, **kwargs):
    """ Fill sqlite database with a few search terms. """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    df = pd.DataFrame(my_terms, columns=['search_term'])
    try:
        df.to_sql('twitter_terms', conn)
    except ValueError:
        # table already exists
        pass

Example #3

Show file

def generate_search_terms(**kwargs):
    """ Generate subdag to search twitter for terms. """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    query = "select * from twitter_terms"
    df = pd.read_sql_query(query, conn)
    return random.choice([
        'search_{}_twitter'.format(re.sub(r'\W+', '', t))
        for t in df.search_term.values
    ])

Example #4

Show file

def fill_terms(my_terms=SEARCH_TERMS, **kwargs):
    """ Fill sqlite database with a few search terms. """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    df = pd.DataFrame(my_terms, columns=['search_term'])
    try:
        df.to_sql('twitter_terms', conn)
    except ValueError:
        # table already exists
        pass

Example #5

Show file

File: twitter_airflow.py Project: drhbigdave/data-pipelines-course

def csv_to_sqlite(directory=RAW_TWEET_DIR, **kwargs):
    """ Very basic csv to sqlite pipeline using pandas
        params:
            directory: str (file path to csv files)
    """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    for fname in glob.glob('{}/*.csv'.format(directory)):
        if '_read' not in fname:
            try:
                df = pd.read_csv(fname)
                df.to_sql('tweets', conn, if_exists='append', index=False)
                shutil.move(fname, fname.replace('.csv', '_read.csv'))
            except pd.io.common.EmptyDataError:
                # probably an io error with another task / open file
                continue

Example #6

Show file

def csv_to_sqlite(directory=RAW_TWEET_DIR, **kwargs):
    """ Very basic csv to sqlite pipeline using pandas
        params:
            directory: str (file path to csv files)
    """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    for fname in glob.glob('{}/*.csv'.format(directory)):
        if '_read' not in fname:
            try:
                df = pd.read_csv(fname)
                df.to_sql('tweets', conn, if_exists='append', index=False)
                shutil.move(fname, fname.replace('.csv', '_read.csv'))
            except pd.io.common.EmptyDataError:
                # probably an io error with another task / open file
                continue

Example #7

Show file

 def get_sql_hook(self, sql_conn_id):
     if 'sqlite' in sql_conn_id:
         from airflow.hooks import SqliteHook
         return SqliteHook(sql_conn_id)
     else:
         from airflow.hooks import MySqlHook
         return MySqlHook(sql_conn_id)

Example #8

Show file

File: twitter_airflow.py Project: drhbigdave/data-pipelines-course

def identify_popular_links(directory=RAW_TWEET_DIR, write_mode='w', **kwargs):
    """ Identify the most popular links from the last day of tweest in the db
        Writes them to latest_links.txt in the RAW_TWEET_DIR
        (or directory kwarg)
    """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    query = """select * from tweets where
    created > date('now', '-1 days') and urls is not null
    order by favorite_count"""
    df = pd.read_sql_query(query, conn)
    df.urls = df.urls.map(ast.literal_eval)
    cntr = Counter(itertools.chain.from_iterable(df.urls.values))
    with open('{}/latest_links.txt'.format(directory), write_mode) as latest:
        wrtr = writer(latest)
        wrtr.writerow(['url', 'count'])
        wrtr.writerows(cntr.most_common(5))

Example #9

Show file

def identify_popular_links(directory=RAW_TWEET_DIR, write_mode='w', **kwargs):
    """ Identify the most popular links from the last day of tweest in the db
        Writes them to latest_links.txt in the RAW_TWEET_DIR
        (or directory kwarg)
    """
    sqlite = SqliteHook('twitter_sqlite')
    conn = sqlite.get_conn()
    query = """select * from tweets where
    created > date('now', '-1 days') and urls is not null
    order by favorite_count"""
    df = pd.read_sql_query(query, conn)
    df.urls = df.urls.map(ast.literal_eval)
    cntr = Counter(itertools.chain.from_iterable(df.urls.values))
    with open('{}/latest_links.txt'.format(directory), write_mode) as latest:
        wrtr = writer(latest)
        wrtr.writerow(['url', 'count'])
        wrtr.writerows(cntr.most_common(5))

Example #10

Show file

File: sqlite_operator.py Project: wpeterson328/airflow

 def execute(self, context):
     logging.info('Executing: ' + self.sql)
     hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id)
     hook.run(self.sql, parameters=self.parameters)