Python query_dfの例、data_providers.finnhub.initialize.pg_db.query_df Pythonの例

コード例 #1

0

ファイルを表示

ファイル: get_data_finnhub.py プロジェクト: vhphan/algo102

def get_symbol_db(min_market_cap=100):
    sql = f"""
    SELECT symbol FROM eproject_fx.public.biz_fin WHERE CAST(biz_fin."marketCapitalization" as float)>{min_market_cap}
        GROUP BY symbol;
    """
    df = pg_db.query_df(sql)
    return df

コード例 #2

0

ファイルを表示

ファイル: get_data_finnhub.py プロジェクト: vhphan/algo102

def get_stock_data_db(symbol, num_months_ago=1):
    sql = f"""
    SELECT * FROM stocks_finn_hub 
        WHERE symbol='{symbol}' and 
        date>'now'::timestamp - '{num_months_ago} month'::interval order by date;
    """
    df = pg_db.query_df(sql)
    return df

コード例 #3

0

ファイルを表示

def get_breakout_symbols_db(window_percentage=2,
                            window_length=15,
                            min_market_cap=100):
    """
    :param window_percentage:
    :param window_length:
    :param min_market_cap:
    :return: list of symbols 'breaking' out of 'consolidation' on the latest candle
    """
    sql1 = f"""
        SELECT r.symbol, max(r.c), min(r.c), min(r.c) / nullif( max(r.c), 0) as min_max_ratio FROM
            (SELECT stocks_finn_hub.*,
                    rank() OVER (
                        PARTITION BY symbol
                        ORDER BY date DESC)
            FROM stocks_finn_hub) r WHERE RANK <= {window_length + 1} AND RANK>1
            GROUP BY r.symbol;
    """
    df1 = pg_db.query_df(sql1)
    df1 = df1[df1['min_max_ratio'] >= (1 - window_percentage / 100)]
    sql2 = f"""
            SELECT r.c, biz_fin.* FROM
(SELECT stocks_finn_hub.*,
        rank() OVER (
            PARTITION BY symbol
            ORDER BY date DESC)
FROM stocks_finn_hub) r 
INNER JOIN biz_fin USING (symbol)
WHERE RANK = 1
            """
    df2 = pg_db.query_df(sql2)

    df3 = pd.merge(df1, df2, on='symbol')

    result_df = df3[(df3['c'] > df3['max'])]

    return result_df

コード例 #4

0

ファイルを表示

ファイル: get_data_finnhub.py プロジェクト: vhphan/algo102

def update_data_db_symbol(symbol):
    # get last date of symbol in database
    sql = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'"
    df_last = pg_db.query_df(sql)

    start = one_year_ago_u
    if today.day_of_week == 1:
        min_delta_days = 3 * 24 * 60 * 60
    else:
        min_delta_days = 1 * 24 * 60 * 60

    if len(df_last):
        if df_last.loc[0, 'max_date'] is not None:
            last_day_in_db = df_last.loc[0, 'max_date']
            start = last_day_in_db + 1 * 24 * 60 * 60

    if today_u - start > min_delta_days:
        candles_df = get_stock_data(symbol, start, today_u)

        if candles_df is not None and len(candles_df):
            pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False)

コード例 #5

0

ファイルを表示

ファイル: apply_filters.py プロジェクト: vhphan/algo102

def apply_the_filters(start_row=0,
                      use_forecast=False,
                      pc_higher_sp=10,
                      min_to_52w_low=1.3,
                      min_52w_high=0.75):
    global df_symbol
    # %% 0 get all symbols
    df_symbols = get_symbols()
    df_forecast = pd.read_csv(
        '/home2/eproject/veehuen/python/algo102/fbprophet/growth_stocks.csv')
    # %%
    df = df_forecast if use_forecast else df_symbols
    df_metric_list = []
    df['condition_1'] = False
    df['condition_2'] = False
    df['condition_3'] = False
    df['sma_50'] = None
    df['sma_150'] = None
    df['sma_200'] = None
    # df['52WeekHigh'] = None
    # df['52WeekLow'] = None
    # df['priceRelativeToS&P50013Week'] = None
    # df['priceRelativeToS&P50026Week'] = None
    # df['priceRelativeToS&P5004Week'] = None
    # %% 1 get current price per symbol
    filtered_symbol = []
    for i, row in df[start_row:].iterrows():
        symbol = row['symbol']
        try:
            conditions = [False] * 3
            # if i % 1500 == 0 and i > 0:
            #     break
            print(i, symbol, datetime.now())
            sql = f"SELECT * FROM stocks_finn_hub WHERE symbol='{symbol}' order by DATE DESC LIMIT 200"

            df_symbol = pg_db.query_df(sql)

            # skip if data less than 200 samples
            if len(df_symbol) < 200:
                continue

            df_symbol.index = df_symbol['date'].dt.date
            df_symbol.sort_index(inplace=True)

            df_symbol[f'sma_50'] = df_symbol['c'].rolling(50).mean()
            df_symbol['sma_150'] = df_symbol['c'].rolling(150).mean()
            df_symbol['sma_200'] = df_symbol['c'].rolling(200).mean()
            current = df_symbol.iloc[-1]['c']
            sma_50 = df_symbol.iloc[-1]['sma_50']
            sma_150 = df_symbol.iloc[-1]['sma_150']
            sma_200 = df_symbol.iloc[-1]['sma_200']

            df.loc[i, 'sma_50'] = sma_50
            df.loc[i, 'sma_150'] = sma_150
            df.loc[i, 'sma_200'] = sma_200

        except Exception as e:
            print(symbol, e)
            continue

        try:
            if current > sma_50 > sma_150 > sma_200:
                conditions[0] = True
                df.loc[i, 'condition_1'] = True
        except TypeError:
            continue

        try:
            bs = get_basic_financials(symbol, 'price')
            data_folder = '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data'
            dict_to_json_zipped(bs, f'{data_folder}/bs_{symbol}.json.gzip')

        except Exception as e:
            send_eri_mail('*****@*****.**', e.__repr__(),
                          'finhubb error: bs')
            continue

        if i % 3 == 0:
            sleep_time = randint(3, 8)
            print(i, f'sleeping {sleep_time} seconds')
            time.sleep(sleep_time)

        high_52_week = bs.get('metric').get('52WeekHigh')
        low_52_week = bs.get('metric').get('52WeekLow')

        try:
            if current / low_52_week > min_to_52w_low and current / high_52_week > min_52w_high:
                conditions[1] = True
                df.loc[i, 'condition_2'] = True
        except TypeError:
            continue

        price_relative_to_SP500 = [
            bs.get('metric').get('priceRelativeToS&P50013Week'),
            bs.get('metric').get('priceRelativeToS&P50026Week'),
            bs.get('metric').get('priceRelativeToS&P5004Week'),
            # bs.get('metric').get('priceRelativeToS&P50052Week'),
        ]

        # save metric to df
        row_metric = pd.DataFrame.from_dict(bs.get('metric'), orient='index').T
        row_metric.index = [i]
        df_metric_list.append(row_metric)

        try:
            if all(i >= pc_higher_sp for i in price_relative_to_SP500):
                conditions[2] = True
                df.loc[i, 'condition_3'] = True
        except TypeError:
            continue

        if all(conditions):
            filtered_symbol.append(symbol)
            print(filtered_symbol)
            print(df.loc[i])

        if i % 1000 == 0 and i > 0:
            df.to_csv(
                f'/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered_{i}.csv',
                index=False)
        if i % 100 == 0 and i > 0:
            send_eri_mail('*****@*****.**', f'processed {i} symbols',
                          'finhubb progress: bs')

    df_metric = pd.concat(df_metric_list)
    final_df_filtered = pd.concat([df, df_metric], axis=1)
    final_df_filtered.to_csv(
        '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered.csv',
        index=False)
    pg_db.df_to_db(final_df_filtered,
                   name='biz_fin',
                   if_exists='replace',
                   index=False)

コード例 #6

0

ファイルを表示

ファイル: get_data_finnhub.py プロジェクト: vhphan/algo102

def update_data_db():
    # get symbols
    stocks_list = get_symbols()
    stocks_df = pd.DataFrame(stocks_list)
    # for i, symbol in enumerate(['GOOG', 'AAPL', ]):
    j = 0
    last_slept_at = -1

    for i, symbol in enumerate(stocks_df['symbol']):

        # get last date of symbol in database
        sql_ = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'"
        df_last = pg_db.query_df(sql_)

        # only get data if last day is more thn 1 day before today
        start = one_year_ago_u
        if today.day_of_week == 1:
            min_delta_days = 3 * 24 * 60 * 60
        else:
            min_delta_days = 1 * 24 * 60 * 60

        if len(df_last):
            if df_last.loc[0, 'max_date'] is not None:
                last_day_in_db = df_last.loc[0, 'max_date']
                start = last_day_in_db + 1 * 24 * 60 * 60

        # if df_last.loc[0, 'max_date']:
        #     last_day_in_db = df_last.loc[0, 't']
        #     next_day_in_db = pendulum.from_timestamp()
        # else:
        #     start_u = one_year_ago

        # delay => to not break API Limit
        if today_u - start > min_delta_days:
            candles_df = get_stock_data(symbol, start, tomorrow_u)
            # print('sleeping 1 second')
            # time.sleep(1)
            j += 1
            print(f"j={j}")

            if j % 5 == 0 and j > 0 and j != last_slept_at:
                print('sleeping for 5 seconds...')
                time.sleep(10)
                last_slept_at = j

            if candles_df is not None and len(candles_df):
                try:
                    pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False)
                except Exception as e:
                    print(e)
                    candles_df.to_csv(f'csv/{symbol.csv}')

                print(f'finished {i} {symbol}')
                continue
        else:
            print(f'skipping {symbol}')

        if i % 1000 == 0 and i > 0:
            msg = f"<p>completed {i} stocks....</p>"
            send_eri_mail('*****@*****.**', message_=msg, subject='finhubb data progress', message_type='html')
    # update meta
    sql_ = f"""UPDATE eproject_fx.public.mr_meta SET last_updated = '{pendulum.now(tz='UTC').strftime('%Y-%m-%d %H:%M UTC')}' WHERE job_name='update stock data'"""
    pg_db.query(sql_)