def analyze_hist_candles(df, bull=True, bear=False):
    """Analyze historical candlestick patterns."""

    # Get list of all symbols from server
    all_symbols = serverAPI('all_symbols').df
    # Keep only common stocks for analysis
    all_cs = all_symbols[all_symbols['type'] == 'cs']['symbol'].tolist()

    # Eliminate prices < $1.5, and volume less than 500,000
    df_sorted = df[(df['fClose'] > 1.5) & (
        df['fVolume'] > 500000)].copy(deep=True)
    # Keep only common stocks
    df_sorted = df_sorted[df_sorted['symbol'].isin(all_cs)]
    # Add a range column
    df_sorted['fRange'] = (df_sorted['fHigh'] - df_sorted['fLow']).round(3)
    # Add a duplicate symbol column for analysis
    df_sorted['sym'] = df_sorted['symbol']

    # Sort by symbol and date, descending
    df_sorted.sort_values(by=['symbol', 'date'], ascending=True, inplace=True)

    if bull:
        bu_prints(df_sorted.set_index('symbol'))
    if bear:
        be_prints(df_sorted.set_index('symbol'))

    return df_sorted
Esempio n. 2
0
def analyze_iex_ytd():
    """Analyze iex historical data for this year."""
    df_prices_get = serverAPI('stock_close_prices').df
    df_prices = df_prices_get.copy()
    df_prices['date'] = pd.to_datetime(df_prices['date'], unit='ms')

    dt_max = df_prices['date'].max().date()
    path = Path(baseDir().path, 'StockEOD/combined', f"_{dt_max}.parquet")
    df_prices.to_parquet(path)

    df_2021 = df_prices[df_prices['date'].dt.year >= 2021].copy()
    return df_2021
Esempio n. 3
0
def get_clean_yoptions():
    """Get and clean yoptions data."""
    df_cleaned = None
    df_temp = serverAPI('yoptions_all').df

    try:
        df_cleaned = clean_yfinance_options(df_temp=df_temp, refresh=True).copy()
    except Exception as e:
        print(str(e))

    df_cleaned['date'] = (pd.to_datetime(df_cleaned['lastTradeDate'], format='%Y-%m-%d')
                            .dt.normalize())
    df_cleaned['symbol'] = df_cleaned['Underlying']

    return df_cleaned
Esempio n. 4
0
def get_company_meta_data():
    """Get company meta data, save locally, from IEX."""
    all_symbols = serverAPI('all_symbols').df
    all_cs = all_symbols[all_symbols['type'].isin(['cs', 'ad'])]
    sym_list = all_cs['symbol'].unique().tolist()

    bpath = Path(baseDir().path, 'company_stats/meta')

    for sym in tqdm(sym_list):
        try:
            ud = urlData(f"/stock/{sym}/company")
            fpath_suf = f"{sym.lower()[0]}/_{sym}.parquet"
            fpath = bpath.joinpath(fpath_suf)
            write_to_parquet(ud.df, fpath)
        except Exception as e:
            print(f"Company meta stats error: {type(e)} {str(e)}")
Esempio n. 5
0
def get_all_max_hist(sym_list=False):
    """Get all max historical symbol data from IEX."""
    load_dotenv()
    base_url = os.environ.get("base_url")
    base_path = f"{baseDir().path}/historical/2021"
    true, false = True, False
    payload = {'token': os.environ.get("iex_publish_api"), 'chartByDay': true}

    if not sym_list:
        all_symbols = serverAPI('all_symbols').df
        all_syms = all_symbols[all_symbols['type'].isin(['cs'])]
        sym_list = all_syms['symbol'].tolist()

    hist_dict, hist_errors_dict = {}, {}
    hist_list, hists_checked, hist_errors = [], [], []

    for sym in tqdm(sym_list):
        fpath = f"{base_path}/{sym[0].lower()}/_{sym}.parquet"
        # If the local file doens't already. Doesn't check for missing data
        if not os.path.exists(fpath):
            url = f"{base_url}/stock/{sym}/chart/max"
            # payload = {'token': os.environ.get("iex_publish_api"), 'chartByDay': true}
            get = requests.get(url, params=payload)

            try:
                df = pd.DataFrame(get.json())
                # hist_dict[sym] = df
                hist_list.append(sym)
                df.to_parquet(fpath)
            except Exception as e:
                print(e)
                hist_errors_dict[sym] = get
                hist_errors.append(sym)
        else:
            hists_checked.append(sym)
        # break

    result = ({
        'hist_dict': hist_dict,
        'hist_list': hist_list,
        'hists_checked': hists_checked,
        'hist_errors_dict': hist_errors_dict,
        'hist_errors': hist_errors
    })

    return result
def treasuries_clean_write():
    """Clean, and store daily treasury data locally."""
    tz = serverAPI('treasuries').df

    tz['time_test'] = pd.to_datetime(tz['time'], unit='ms', errors='coerce')
    tz_mod = tz.dropna(subset=['time_test'])
    tz_mod = tz_mod.drop(columns=['time']).rename(columns={'time_test': 'time'})
    tz = tz[~tz.index.isin(tz_mod.index)].drop(columns=['time_test']).copy()
    tz = pd.concat([tz, tz_mod])
    col_dict = ({'^IRX': 'ThreeM', '^FVX': 'ThreeY',
                 '^TNX': 'FiveY', '^TYX': 'TenY'})
    tz.rename(columns=col_dict, inplace=True)
    tz['time'] = pd.to_datetime(tz['time'])
    tz['date'] = pd.to_datetime(tz['time'].dt.date)
    tz = tz.sort_values(by=['date'])

    tz_daily = tz.groupby(by=['date']).mean()
    path_to_write = Path(baseDir().path, 'economic_data/tz_daily.parquet')
    write_to_parquet(tz_daily, path_to_write)

    return tz_daily
Esempio n. 7
0
def get_clean_all_st_data():
    """Get all stocktwits data, clean it all too."""

    st_data = serverAPI('st_trend_all').df
    st_data = dataTypes(st_data).df
    st_data['timestamp'] = pd.to_datetime(st_data['timestamp'], unit='ms')
    st_data['date'] = st_data['timestamp'].dt.normalize()

    st_na = False
    try:
        st_group = st_data.groupby(by=['symbol', 'date']).count()
        st_na = st_group.dropna()
    except Exception as e:
        print(str(e))

    st_counts = (st_na.reset_index()
                      .drop(columns=['watchlist_count', 'timestamp'])
                      .rename(columns={'id': 'count'}))
    st_all = pd.merge(st_data, st_counts, on=['symbol', 'date'])
    st_all.rename(columns={'timestamp': 'st_time'}, inplace=True)

    st_all = dataTypes(st_all).df

    return st_all
Esempio n. 8
0
importlib.reload(sys.modules['data_collect.iex_class'])

importlib.reload(sys.modules['data_collect.yfinance_funcs'])


importlib.reload(sys.modules['api'])


importlib.reload(sys.modules['master_funcs.master_iex_stats'])

importlib.reload(sys.modules['data_collect.sec_rss'])


# %% codecell
##########################################
serverAPI('redo', val='clear_yoptions_temp_unfin')
# serverAPI('redo', val='make_yoptions_file_struct')
# %% codecell
serverAPI('redo', val='master_yfinance_options_collect')
# %% codecell
serverAPI('redo', val='master_yfinance_options_followup')
# %% codecell
serverAPI('redo', val='yoptions_combine_last')
# %% codecell
serverAPI('redo', val='combine_yoptions_combine_all')
# %% codecell
serverAPI('redo', val='combine_yoptions_all')
# %% codecell
serverAPI('redo', val='yoptions_combine_temp_all')
# %% codecell
serverAPI('redo', val='yoptions_drop_hist_dupes')
Esempio n. 9
0
import pandas as pd
import numpy as np

from data_collect.sec_rss import SecRssFeed, AnalyzeSecRss

importlib.reload(sys.modules['data_collect.sec_rss'])
from data_collect.sec_rss import SecRssFeed, AnalyzeSecRss

from api import serverAPI

# %% codecell

srf = SecRssFeed()
srf_df = srf.df

all_syms = serverAPI('all_symbols').df
ocgn_df = all_syms[all_syms['symbol'] == 'OCGN']

srf_df.info()

srf_df['dt'] = pd.to_datetime(srf_df['pubDate'])

prev_15 = (datetime.now() - timedelta(minutes=60)).time()
sec_df = (srf_df[(srf_df['dt'].dt.time > prev_15)
                 & (srf_df['dt'].dt.date == date.today())].copy())

sec_df

srf_df[srf_df['CIK'] == ocgn_df['cik'].iloc[0]]
srf_df.df
# %% codecell
Esempio n. 10
0
from zipfile import ZipFile

with ZipFile(f, 'r') as zip:
    zip.extractall(dir_to_extract)

# %% codecell
#########################################################

# Form 13G 13G/A 13D/A

# sec_idx = serverAPI(which='redo', val='sec_idx_master')
# sec_inst = serverAPI(which='sec_inst_holdings')

# iex_close = serverAPI(which='redo', val='iex_close')

sec_master = serverAPI(which='redo', val='sec_idx_master')
sec_master = serverAPI(which='redo', val='combine_all_sec_masters')

sec_masters = serverAPI(which='sec_master_all').df

sec_inst.df.shape
"""
OCGNs merger agreement
https://fintel.io/doc/sec-hsgx-histogenics-8k-2019-april-08-17994
"""

sec_master = secMasterIdx()
sec_df = sec_master.df.copy(deep=True)
sec_df.shape
sec_df.dtypes
sec_df['Form Type'].value_counts()
Esempio n. 11
0
    col_dict = ({'^IRX': 'ThreeM', '^FVX': 'ThreeY',
                 '^TNX': 'FiveY', '^TYX': 'TenY'})
    tz.rename(columns=col_dict, inplace=True)
    tz['time'] = pd.to_datetime(tz['time'])
    tz['date'] = pd.to_datetime(tz['time'].dt.date)
    tz = tz.sort_values(by=['date'])

    tz_daily = tz.groupby(by=['date']).mean()
    path_to_write = Path(baseDir().path, 'economic_data/tz_daily.parquet')
    write_to_parquet(tz_daily, path_to_write)

    return tz_daily

# %% codecell

yoptions_all = serverAPI('yoptions_all').df

shape = yoptions_all.shape[0]
for col in yoptions_all.columns:
    na = yoptions_all[col].isna().sum()
    if (na / shape) > .2:
        print(f"Column {col}: has {na}s  {str(round(na / shape, 2))}")

    try:
        inf = yoptions_all[np.isfinite(yoptions_all[col]) == False].shape[0]
        if inf > 0:
            print(f"{col} {inf}")
    except TypeError:
        pass

# %% codecell
Esempio n. 12
0
df_test

# %% codecell

# There's the question of correlation with percentage returns
# Or whether to apply a logarithmic function to flatten the noise.
# I'm guess that ^ this is probably the better idea.

scaled_price = (logprice - np.mean(logprice)) / np.sqrt(np.var(logprice))

# %% codecell
fpath = Path(baseDir().path, 'ref_data', 'peer_list', '_peers.parquet')
df_peers = pd.read_parquet(fpath)

all_syms = serverAPI('all_symbols').df
df_peers = pd.merge(df_peers,
                    all_syms[['symbol', 'type']],
                    on='symbol',
                    how='left')
df_peers = (df_peers.mask('corr', .95, lesser=True).mask('corr',
                                                         -.95,
                                                         greater=True))

# %% codecell
df_peers_idx = df_peers.set_index(['key', 'type'])

df_peers

df_peers[df_peers['key'] == 'CYBN']
Esempio n. 13
0
"""A file dedicated to finding SEC special dividend announcements."""
# %% codecell
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import requests
import os

from api import serverAPI

from multiuse.help_class import getDate
from multiuse.create_file_struct import makedirs_with_permissions
# %% codecell

# This is purely for name lookup
sec_ref_data = serverAPI('sec_ref').df
sec_ref_data.head(5)


sec_ref = serverAPI('sec_master_all').df
sec_df = sec_ref.copy()
sec_df['Date Filed'] = pd.to_datetime(sec_df['Date Filed'], format='%Y%m%d')

# sec_df.groupby('Date Filed').filter(lambda file: sec_df[file] == '8-K')

sec_df['Form Type'].value_counts()

bus_days = getDate.get_bus_days()
busdays_2021 = (bus_days[(bus_days['date'] >= '2021') &
                         (bus_days['date'] <= str(getDate.query('iex_eod')))])
Esempio n. 14
0
# (5,100) or 5/45 or 10% of signals are profitable using the pc_10

df_short.head()
(df_short.reset_index().drop_duplicates(
    subset=['symbol', 'vol/avg']).sort_values(by=['side_pc10+', 'vol/avg'],
                                              ascending=False).set_index(
                                                  'date', 'symbol').head(50))
# df.sort_values(by=['side_pc10+', 'vol/avg'], ascending=False).head(50)

# %% codecell
##############################################################

cboe_df['date_dt'] = pd.to_datetime(cboe_df['dataDate'])
cboe_df['date_df'] = (cboe_df['date_dt'] + bs).dt.date

my_watch = serverAPI('st_watch').df.T
my_syms = my_watch['symbols'].values.tolist()

# %% codecell
##############################################################
# Threading

import logging
import threading
import time
import concurrent.futures


def thread_function(name):
    logging.info("Thread %s: starting", name)
    time.sleep(2)
Esempio n. 15
0
pd.set_option('display.max_columns', None)
# Display maximum rows
pd.set_option('display.max_rows', 500)

# %% codecell
########################################################
import glob
import threading
base_dir = baseDir().path
fpath = f"{base_dir}/derivatives/iex_symref/**"
paths = glob.glob(fpath, recursive=True)


view_symref = IexOptionSymref('VIEW')

all_syms = serverAPI('all_symbols').df
all_cs = all_syms[all_syms['type'] == 'cs']
all_cs.shape
all_cs_sym = all_cs['symbol'].tolist()

for sym in all_cs_sym:
    th = threading.Thread(target=IexOptionSymref, args=(sym,))
    th.start()

all_syms.head(10)

def iex_options_symbol_ref():
    """Add tasks to queue to execute."""
    syms_fpath = f"{base_dir}/tickers/all_symbols.gz"
    all_syms = pd.read_json(syms_fpath, compression='gzip')
Esempio n. 16
0
    break

df = regStudies(df).df
df = makeDrawings(df).df

df['localMin_5'].value_counts()

df['localMin_10'].value_counts()

df.head(10)

df.shape
# %% codecell
#############################################

df = serverAPI('iex_quotes_raw').df
df.shape

iex_df = dataTypes(df).df

# 27 mbs with data type adjustments
iex_df.info(memory_usage='deep')
# 154 mbs without data type adjustments
df.info(memory_usage='deep')
import numpy as np
np.finfo('float32')
np.finfo('float16')

np.finfo('float16').max
np.finfo('float32').max
# %% codecell
Esempio n. 17
0

report_date = getDate.which_fname_date()

report_date = datetime.date(2021, 3, 26)
td_vol = tradeVolume(report_date, 'con_volume', fresh=True).vol_df

td_vol_last = td_vol.copy(deep=True)


td_vol_last.sort_values(by=['mQuant'], ascending=False).head(100)

td_vol_last.head(10)


cboe_last = serverAPI('cboe_mmo_exp_last').df
for key in cboe_last.keys():
    cboe_df = cboe_last[key]
    break

td_vol_last.rename(columns={'contdate': 'expDate', 'underlying': 'Underlying'}, inplace=True)
td_vol_last.drop(columns=['pkind', 'exId'], inplace=True)

my_watch = serverAPI('st_watch').df.T
my_watch_syms = my_watch['symbols'].unique()

both_df = pd.merge(td_vol_last, cboe_df, how='inner', on=['Underlying', 'expDate'])
both_df.head(10)

my_syms_df = both_df[both_df['Underlying'].isin(my_watch_syms.tolist())].copy(deep=True)
Esempio n. 18
0
    from multiuse.help_class import df_create_bins

# %% codecell

from master_funcs.yoptions_master import SetUpYahooOptions, yoptions_combine_temp_all
importlib.reload(sys.modules['master_funcs.yoptions_master'])
from master_funcs.yoptions_master import SetUpYahooOptions, yoptions_combine_temp_all

from data_collect.yfinance_get_options import yahoo_options
importlib.reload(sys.modules['data_collect.yfinance_get_options'])
from data_collect.yfinance_get_options import yahoo_options
# %% codecell

# %% codecell
from api import serverAPI
serverAPI('redo', val='yoptions_combine_temp_all')

# 1. starts with SetUpYahooOptions
# I also need to set up a prefork class for celery


def execute_yahoo_options(df):
    """Execute for loop. Run from tasks execute_function."""
    df = pd.read_json(df)
    for index, row in df.iterrows():
        yahoo_options(row['symbol'], proxy=row['proxy'])


def yahoo_options(sym, proxy=False, n=False):
    """Get options chain data from yahoo finance."""
    dt = getDate.query('iex_eod')
Esempio n. 19
0
# %% codecell
##################################

# Daily treasury report
url = "https://fsapps.fiscal.treasury.gov/dts/files/21091000.xlsx"
url1 = "https://fsapps.fiscal.treasury.gov/dts/files/21091000.txt"

# %% codecell
##################################


# %% codecell
##################################

serverAPI('redo', val='create_sec_rss_hist')

# %% codecell
##################################

fpath = "/Users/unknown1/Algo/data/iex_eod_quotes/combined/_2021_all_2021-07-16.gz"
df = pd.read_json(fpath, compression='gzip')


#df = pd.read_csv(fpath, compression='gzip')

#df = pd.read_csv(fpath, compression='gzip', usecols=cols_to_keep)

cols_to_keep = (['symbol', 'open', 'close', 'high', 'highTime', 'low',
                 'lowTime', 'latestUpdate', 'previousClose', 'previousVolume',
                 'change', 'changePercent', 'volume', 'avgTotalVolume',