Beispiel #1
0
def top_coin_cmc():
    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    parameters = {'start': '1', 'limit': '5000', 'convert': 'USD'}
    headers = {
        'Accepts': 'application/json',
        'X-CMC_PRO_API_KEY': coin_market_cap_keys['api_key'],
    }
    session = Session()
    session.headers.update(headers)
    try:
        response = session.get(url, params=parameters)
        data = json.loads(response.text)
        data_df = pd.DataFrame(data['data'])
        pg_db.df_to_db(
            data_df,
            name='cmc_listings',
            if_exists='replace',
            index=False,
            dtype={
                'quote': sqlalchemy.types.JSON,
                'platform': sqlalchemy.types.JSON,
            },
        )

        return data
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)
Beispiel #2
0
def save_files_to_db():
    current_file_path = Path(__file__).parent.absolute()
    for idx, file in enumerate(Path(f"{current_file_path}/data").glob('*-1d-data.zip'), start=1):
        symbol = Path(file).stem.split('-')[0]
        df = pd.read_csv(file, parse_dates=['timestamp'], index_col='timestamp')
        df['interval'] = '1d'
        df['symbol'] = symbol
        if_exists = 'replace' if idx == 1 else 'append'
        pg_db.df_to_db(df, name='binance', if_exists=if_exists, index=True)
        msg = f'finished {idx} symbols'
        print(msg)
        if idx % 1000 == 0:
            send_eri_mail('*****@*****.**', msg, 'files 2 db progress')
Beispiel #3
0
def get_all(symbols):
    # %%

    print(len(symbols))
    # %%
    all_tickers = Ticker(symbols)
    df2 = all_tickers.history(period='2y', interval='1d')

    if isinstance(df2, pd.core.frame.DataFrame) and not df2.empty:
        pg_db.df_to_db(df2.reset_index()[DB_COLUMNS],
                       name='temp_yahoo_stock_data',
                       if_exists='replace',
                       index=False)

    # %%
    else:
        final_df = pd.DataFrame()
        for idx, (symbol, dataframe) in enumerate(df2.items(), start=1):
            print(symbol)
            if isinstance(dataframe,
                          pd.core.frame.DataFrame) and not dataframe.empty:
                print(dataframe.head())
                dataframe['symbol'] = symbol
                dataframe.reset_index(inplace=True)
                dataframe.rename(columns={'index': 'date'}, inplace=True)

                final_df = pd.concat([final_df, dataframe])
            if idx % 100 == 0 or idx == len(df2):
                if_exists = 'replace' if idx == 100 else 'append'
                try:
                    if if_exists == 'replace' and 'splits' not in final_df.columns:
                        final_df['splits'] = None

                    pg_db.df_to_db(final_df[DB_COLUMNS],
                                   name='temp_yahoo_stock_data',
                                   if_exists=if_exists,
                                   index=False)

                except AttributeError:
                    print(final_df)

            print(f'completed {symbol}, {idx} of {len(df2)}')
Beispiel #4
0
def get_all_binance(symbol, kline_size, save=False, save_to_db=False):
    filename = f'data/{symbol}-{kline_size}-data.zip'
    if os.path.isfile(filename):
        data_df = pd.read_csv(filename)
    else:
        data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source="binance")
    delta_min = (newest_point - oldest_point).total_seconds() / 60
    available_data = math.ceil(delta_min / binsizes[kline_size])
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'):
        print(f'Downloading all available {kline_size} data for {symbol}. Be patient..!')
    else:
        print(
            f'Downloading {delta_min: f} minutes of new data available for {symbol}, i.e. {available_data:d} instances of {kline_size} data.')
    klines = binance_client.get_historical_klines(symbol,
                                                  kline_size,
                                                  oldest_point.strftime("%d %b %Y %H:%M:%S"),
                                                  newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines,
                        columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av',
                                 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')

    temp_df = None
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else:
        data_df = data
    data_df.set_index('timestamp', inplace=True)
    if save:
        archive_name = f'{symbol}-{kline_size}-data.csv'
        compression_options = dict(method='zip', archive_name=archive_name)
        data_df.to_csv(filename, compression=compression_options)
    if save_to_db and temp_df is not None:
        temp_df.set_index('timestamp', inplace=True)
        temp_df['interval'] = kline_size
        temp_df['symbol'] = symbol
        pg_db.df_to_db(temp_df, name='binance', if_exists='append', index=False)

    print('All caught up..!')
    return data_df
Beispiel #5
0
def update_data_db_symbol(symbol):
    # get last date of symbol in database
    sql = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'"
    df_last = pg_db.query_df(sql)

    start = one_year_ago_u
    if today.day_of_week == 1:
        min_delta_days = 3 * 24 * 60 * 60
    else:
        min_delta_days = 1 * 24 * 60 * 60

    if len(df_last):
        if df_last.loc[0, 'max_date'] is not None:
            last_day_in_db = df_last.loc[0, 'max_date']
            start = last_day_in_db + 1 * 24 * 60 * 60

    if today_u - start > min_delta_days:
        candles_df = get_stock_data(symbol, start, today_u)

        if candles_df is not None and len(candles_df):
            pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False)
Beispiel #6
0
def apply_the_filters(start_row=0,
                      use_forecast=False,
                      pc_higher_sp=10,
                      min_to_52w_low=1.3,
                      min_52w_high=0.75):
    global df_symbol
    # %% 0 get all symbols
    df_symbols = get_symbols()
    df_forecast = pd.read_csv(
        '/home2/eproject/veehuen/python/algo102/fbprophet/growth_stocks.csv')
    # %%
    df = df_forecast if use_forecast else df_symbols
    df_metric_list = []
    df['condition_1'] = False
    df['condition_2'] = False
    df['condition_3'] = False
    df['sma_50'] = None
    df['sma_150'] = None
    df['sma_200'] = None
    # df['52WeekHigh'] = None
    # df['52WeekLow'] = None
    # df['priceRelativeToS&P50013Week'] = None
    # df['priceRelativeToS&P50026Week'] = None
    # df['priceRelativeToS&P5004Week'] = None
    # %% 1 get current price per symbol
    filtered_symbol = []
    for i, row in df[start_row:].iterrows():
        symbol = row['symbol']
        try:
            conditions = [False] * 3
            # if i % 1500 == 0 and i > 0:
            #     break
            print(i, symbol, datetime.now())
            sql = f"SELECT * FROM stocks_finn_hub WHERE symbol='{symbol}' order by DATE DESC LIMIT 200"

            df_symbol = pg_db.query_df(sql)

            # skip if data less than 200 samples
            if len(df_symbol) < 200:
                continue

            df_symbol.index = df_symbol['date'].dt.date
            df_symbol.sort_index(inplace=True)

            df_symbol[f'sma_50'] = df_symbol['c'].rolling(50).mean()
            df_symbol['sma_150'] = df_symbol['c'].rolling(150).mean()
            df_symbol['sma_200'] = df_symbol['c'].rolling(200).mean()
            current = df_symbol.iloc[-1]['c']
            sma_50 = df_symbol.iloc[-1]['sma_50']
            sma_150 = df_symbol.iloc[-1]['sma_150']
            sma_200 = df_symbol.iloc[-1]['sma_200']

            df.loc[i, 'sma_50'] = sma_50
            df.loc[i, 'sma_150'] = sma_150
            df.loc[i, 'sma_200'] = sma_200

        except Exception as e:
            print(symbol, e)
            continue

        try:
            if current > sma_50 > sma_150 > sma_200:
                conditions[0] = True
                df.loc[i, 'condition_1'] = True
        except TypeError:
            continue

        try:
            bs = get_basic_financials(symbol, 'price')
            data_folder = '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data'
            dict_to_json_zipped(bs, f'{data_folder}/bs_{symbol}.json.gzip')

        except Exception as e:
            send_eri_mail('*****@*****.**', e.__repr__(),
                          'finhubb error: bs')
            continue

        if i % 3 == 0:
            sleep_time = randint(3, 8)
            print(i, f'sleeping {sleep_time} seconds')
            time.sleep(sleep_time)

        high_52_week = bs.get('metric').get('52WeekHigh')
        low_52_week = bs.get('metric').get('52WeekLow')

        try:
            if current / low_52_week > min_to_52w_low and current / high_52_week > min_52w_high:
                conditions[1] = True
                df.loc[i, 'condition_2'] = True
        except TypeError:
            continue

        price_relative_to_SP500 = [
            bs.get('metric').get('priceRelativeToS&P50013Week'),
            bs.get('metric').get('priceRelativeToS&P50026Week'),
            bs.get('metric').get('priceRelativeToS&P5004Week'),
            # bs.get('metric').get('priceRelativeToS&P50052Week'),
        ]

        # save metric to df
        row_metric = pd.DataFrame.from_dict(bs.get('metric'), orient='index').T
        row_metric.index = [i]
        df_metric_list.append(row_metric)

        try:
            if all(i >= pc_higher_sp for i in price_relative_to_SP500):
                conditions[2] = True
                df.loc[i, 'condition_3'] = True
        except TypeError:
            continue

        if all(conditions):
            filtered_symbol.append(symbol)
            print(filtered_symbol)
            print(df.loc[i])

        if i % 1000 == 0 and i > 0:
            df.to_csv(
                f'/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered_{i}.csv',
                index=False)
        if i % 100 == 0 and i > 0:
            send_eri_mail('*****@*****.**', f'processed {i} symbols',
                          'finhubb progress: bs')

    df_metric = pd.concat(df_metric_list)
    final_df_filtered = pd.concat([df, df_metric], axis=1)
    final_df_filtered.to_csv(
        '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered.csv',
        index=False)
    pg_db.df_to_db(final_df_filtered,
                   name='biz_fin',
                   if_exists='replace',
                   index=False)
Beispiel #7
0
def update_data_db():
    # get symbols
    stocks_list = get_symbols()
    stocks_df = pd.DataFrame(stocks_list)
    # for i, symbol in enumerate(['GOOG', 'AAPL', ]):
    j = 0
    last_slept_at = -1

    for i, symbol in enumerate(stocks_df['symbol']):

        # get last date of symbol in database
        sql_ = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'"
        df_last = pg_db.query_df(sql_)

        # only get data if last day is more thn 1 day before today
        start = one_year_ago_u
        if today.day_of_week == 1:
            min_delta_days = 3 * 24 * 60 * 60
        else:
            min_delta_days = 1 * 24 * 60 * 60

        if len(df_last):
            if df_last.loc[0, 'max_date'] is not None:
                last_day_in_db = df_last.loc[0, 'max_date']
                start = last_day_in_db + 1 * 24 * 60 * 60

        # if df_last.loc[0, 'max_date']:
        #     last_day_in_db = df_last.loc[0, 't']
        #     next_day_in_db = pendulum.from_timestamp()
        # else:
        #     start_u = one_year_ago

        # delay => to not break API Limit
        if today_u - start > min_delta_days:
            candles_df = get_stock_data(symbol, start, tomorrow_u)
            # print('sleeping 1 second')
            # time.sleep(1)
            j += 1
            print(f"j={j}")

            if j % 5 == 0 and j > 0 and j != last_slept_at:
                print('sleeping for 5 seconds...')
                time.sleep(10)
                last_slept_at = j

            if candles_df is not None and len(candles_df):
                try:
                    pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False)
                except Exception as e:
                    print(e)
                    candles_df.to_csv(f'csv/{symbol.csv}')

                print(f'finished {i} {symbol}')
                continue
        else:
            print(f'skipping {symbol}')

        if i % 1000 == 0 and i > 0:
            msg = f"<p>completed {i} stocks....</p>"
            send_eri_mail('*****@*****.**', message_=msg, subject='finhubb data progress', message_type='html')
    # update meta
    sql_ = f"""UPDATE eproject_fx.public.mr_meta SET last_updated = '{pendulum.now(tz='UTC').strftime('%Y-%m-%d %H:%M UTC')}' WHERE job_name='update stock data'"""
    pg_db.query(sql_)