def capture_reddit(subsources, start_date): """Get news from Reddit # API: reddit.com/dev/api # Other API: pushshift.io # Pushshift wrapper: psaw.readthedocs.io :param str subsources: i.e. wallstreetbets :param str start_date: start date i.e. '2/19/2021' """ filters = ['url', 'title', 'subreddit'] limit = 1000 date_dt = dt.strptime(start_date, "%m/%d/%Y") date_int = int(date_dt.timestamp()) # pull submissions from reddit api = PushshiftAPI() submitted = list( api.search_submissions(after=date_int, subreddit=subsources, filter=filters, limit=limit)) # pull list of legit symbols from database to compare with submissions with Database() as db: query = "SELECT id, symbol FROM symbols" rows = db.query(query) symbols = {f"${row['symbol']}": row['id'] for row in rows} args = [] for submit in submitted: words = submit.title.split() def extract(word): return word.lower().startswith('$') cashtags = list(set(filter(extract, words))) if len(cashtags) == 0: continue for cashtag in cashtags: if cashtag not in symbols: continue submit_time = dt.fromtimestamp(submit.created_utc).isoformat() arg = (submit_time, symbols[cashtag], submit.title, subsources, submit.url) args.append(arg) if len(args) > 0: with Database() as db: sql = """ INSERT INTO mention (date, symbol_id, mentions, source, url) VALUES (%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING """ db.executemany(sql, args) db.commit()
async def save_to_database(data): """Save fundamental data in a database""" df = data.unstack().reset_index() df.columns = ['source', 'variable', 'symbol', 'value'] df = df[['symbol', 'source', 'variable', 'value']] df['value'] = df['value'].astype(str) values = df.to_records(index=False) sql = """ INSERT INTO fundamentals (symbol_id, source, var, val) VALUES ((SELECT id FROM symbols WHERE symbol=$1), $2, $3, $4) ON CONFLICT (symbol_id, source, var) DO UPDATE SET symbol_id=excluded.symbol_id, source=excluded.source, var=excluded.var, val=excluded.val; """ # using asynciopf as the Database connection manager for Async creds = { 'host': S.DB_HOST, 'database': S.DB_NAME, 'user': S.DB_USER, 'password': S.DB_PASSWORD } async with asyncpg.create_pool(**creds) as pool: async with pool.acquire() as db: async with db.transaction(): await db.executemany(sql, values) Database().timestamp('fundamentals', close=True)
async def save_to_database(data): """Save price data in a database""" rows = data.itertuples(index=False) values = [list(row) for row in rows] sql = """ INSERT INTO price_history (date, symbol_id, open, high, low, close, adj_close, volume) VALUES ($1, (SELECT id FROM symbols WHERE symbol=$2), $3, $4, $5, $6, $7, $8) ON CONFLICT (symbol_id, date) DO UPDATE SET symbol_id=excluded.symbol_id, date=excluded.date, open=excluded.open, high=excluded.high, low=excluded.low, close=excluded.close, adj_close=excluded.adj_close, volume=excluded.volume; """ # using asynciopg as the Database connection manager for Async creds = { 'host': S.DB_HOST, 'database': S.DB_NAME, 'user': S.DB_USER, 'password': S.DB_PASSWORD } async with asyncpg.create_pool(**creds) as pool: async with pool.acquire() as db: async with db.transaction(): await db.executemany(sql, values) Database().timestamp('price_history', close=True)
async def save_to_database(self): """Save signal table to database""" # long format table signal_df = self.signal.reset_index() signal_df = signal_df.melt(id_vars=['index']) signal_df = signal_df.fillna('') with Database() as db: # erase previous table contents sql = "TRUNCATE TABLE signals" db.execute(sql) db.commit() # save data into signals table database # pd.values deals with an boolean conflict between pandas and SQL values = [tuple(x) for x in signal_df.values] sql = """ INSERT INTO signals (symbol_id, study, value) VALUES ((SELECT id FROM symbols WHERE symbol=%s), %s, %s) ON CONFLICT (symbol_id, study) DO UPDATE SET symbol_id = excluded.symbol_id, study = excluded.study, value = excluded.value; """ db.executemany(sql, values) db.timestamp('signals') db.commit() log.info("Saved signal table to database")
def __init__(self, symbols: list = None, forced: bool = False, source: str = 'tda'): """Gets price history data from download if not within 24 hours, then from data, finally from file. :param symbols: list of symbols :param forced: True bypassess 24 hour download requirement :param source: Source from where the data is downloaded """ log.debug("Loading fundamentals") s = t() data = pd.DataFrame() # checks on last database update for price history within24 = Database().last_update('price_history', close=True) # load from database if within 24 hours if within24 and not forced: data = self.load_database() # download data if database is empty or expired data if data.empty or data is None or not within24 or forced: data = self.download(symbols, source) # update symbol rejects database Database().update_rejected_symbols(symbols=symbols, current=data['symbol'].unique(), close=True) asyncio.run(self.save_to_database(data)) if data.empty or data is None: raise Exception("Empty price history database") self.data = data log.debug(f"Price History Loaded in {t() - s} secs")
def load_database(): """Load symbols & security names from database, excludes rejects""" query = """ SELECT symbol, security FROM symbols WHERE symbols.id NOT IN (SELECT symbol_id FROM rejected_symbols) ORDER BY symbol; """ with Database() as db: df = pd.read_sql_query(query, con=db.connection) log.debug("Symbols loaded from database") return df
def last_update(table): """last update date""" now_utc = pytz.utc.localize(datetime.datetime.utcnow()) timestamp = now_utc - datetime.timedelta(hours=25) query = f"SELECT date FROM last_update WHERE tbl = '{table}';" with Database() as db: timestamp_db = db.query(query)[0] timestamp = timestamp if timestamp_db is None else timestamp_db within_24 = (now_utc - timestamp).seconds <= 24 * 60 * 60 timestamp_est = timestamp.astimezone(pytz.timezone('US/Eastern')) return timestamp_est.strftime('%m-%d-%Y %I:%M %p %Z'), within_24
def __init__(self, symbols: list, forced: bool = False): """Load fundamentals from database/file. Over 24 hrs it downloads. :param symbols: list of symbols :param forced: True bypasses within 24 hour requirement """ log.debug("Loading fundamentals") s = t() data = pd.DataFrame() # checks on last database update for fundamentals within24 = Database().last_update('fundamentals', close=True) # load from database if within 24 hours if within24 and not forced: data = self.load_database() data = self.process_loaded_fundamentals(data) # download data if database is empty or expired data if data.empty or data is None or not within24 or forced: data = self.download(symbols) # update symbol rejects database Database().update_rejected_symbols(symbols=symbols, current=list(data.index), close=True) asyncio.run(self.save_to_database(data)) if data.empty or data is None: raise Exception("Empty fundamentals database") self.data = data log.debug(f"Fundamentals Loaded in {t() - s} secs")
def save_to_database(df): """Save data to database""" values = df.to_records(index=False) sql = """ INSERT INTO symbols (symbol, security, id) VALUES (%s, %s, %s) ON CONFLICT (id) DO UPDATE SET security=excluded.security; """ with Database() as db: db.executemany(sql, values) db.timestamp('symbols') db.commit() log.debug("Symbols saved to database")
def create_table(): """Create tables in database. Usage: * python fybot create_table """ if sys.argv[1] in ["create_tables", "create_table", "create", "table", "reset", "nukeit", "createtable", "start", "go"]: try: from fybot.core.database import Database Database().create_table() sys.exit() except Exception as e: log.error(f"Error creating tables in database. {e}") sys.exit(e)
def load_database(): """Retrieves fundamental data from database, filtered of rejects""" query = """ SELECT symbols.symbol, fundamentals.source, fundamentals.var, fundamentals.val FROM fundamentals INNER JOIN symbols ON fundamentals.symbol_id = symbols.id WHERE symbols.id NOT IN (SELECT symbol_id FROM rejected_symbols) ORDER BY symbols.symbol; """ with Database() as db: df = pd.read_sql_query(query, con=db.connection) log.debug("Loaded fundamentals from database") return df
def load_database(): """Load signals table from database. :return: Dataframe with index: symbol, columns: study, values: value """ query = """ SELECT symbols.symbol, signals.study, signals.value FROM signals INNER JOIN symbols ON signals.symbol_id = symbols.id; """ with Database() as db: df = pd.read_sql_query(query, db.connection) # wide format table df = df.pivot(index='symbol', columns='study', values='value') df = df.replace({'true': True, 'false': False}).fillna('') return df
def __init__(self, source: str, forced: bool = False): """Load symbols from database or file, when older than 24 hours it downloads. If DEBUG then symbols list is limited. Returns: list of symbols, dataframe with symbol/security names. :param source: 'nasdaq' (default), 'alpaca', 'sp500' :param forced: True to force symbols download, bypasses within 24 req """ log.debug("Loading assets") s = t() data = pd.DataFrame() # checks on last database update for symbols within24 = Database().last_update('symbols', close=True) # load from database if within 24 hours if within24 and not forced: data = self.load_database() # download data if database is empty or expired data if data.empty or data is None or not within24 or forced: data = self.download(source) data = self.apply_filters(data) data = data.assign(symbol_id=lambda x: self.unique_id(x)) self.save_to_database(data) # reloading to exclude rejected symbols data = self.load_database() if data.empty or data is None: raise Exception("Empty symbols database") symbols = data['symbol'].to_list() symbols = symbols[:S.MAX_SYMBOLS] self.data = data self.symbols = symbols log.debug(f"Assets Loaded in {t()-s} secs")
def load_database(): """Retrieves the price data from database, filtered of rejects""" query = """ SELECT price_history.date, symbols.symbol, price_history.open, price_history.high, price_history.low, price_history.close, price_history.adj_close, price_history.volume FROM price_history INNER JOIN symbols ON price_history.symbol_id = symbols.id WHERE symbols.id NOT IN (SELECT symbol_id FROM rejected_symbols) ORDER BY symbols.symbol, price_history.date; """ with Database() as db: df = pd.read_sql_query(query, con=db.connection) log.debug("Loaded price history from database") return df
def show_reddit(subsources, num_days): with Database() as db: query = """ SELECT COUNT(*) AS num_mentions, symbol FROM mention JOIN symbols ON symbols.id = mention.symbol_id WHERE date(date) > current_date - interval '%s day' GROUP BY symbol_id, symbol HAVING COUNT(symbol) > 10 ORDER BY num_mentions DESC """ counts = db.query(query, (num_days, )) query = """ SELECT symbol, mentions, url, date, source FROM mention JOIN symbols ON symbols.id = mention.symbol_id WHERE source = %s ORDER BY date DESC LIMIT 50; """ mentions = db.query(query, (subsources, )) rows = db.fetchall() return counts, mentions, rows
def save_ark(data): with Database() as db: etfs = data['fund'].unique() etfs = ", ".join("'{0}'".format(x) for x in etfs) query = f"""SELECT id, symbol FROM symbols WHERE symbol IN ({etfs})""" etfs = db.query(query) etfs = {i['symbol']: i['id'] for i in etfs} for row in data.itertuples(): if not row.ticker: continue query = "SELECT id FROM symbols WHERE symbol = %s" holding = db.query(query, (row.ticker, )) if not holding: continue query = """ INSERT INTO portfolio (id, holding_id, date, shares, weight) VALUES (%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING;""" db.execute(query, (etfs[row.fund], holding['id'], row.date, row.shares, row.weight)) db.commit()