Example #1
0
    def capture_reddit(subsources, start_date):
        """Get news from Reddit

        # API: reddit.com/dev/api
        # Other API: pushshift.io
        # Pushshift wrapper: psaw.readthedocs.io

        :param str subsources: i.e. wallstreetbets
        :param str start_date: start date i.e. '2/19/2021'

        """

        filters = ['url', 'title', 'subreddit']
        limit = 1000
        date_dt = dt.strptime(start_date, "%m/%d/%Y")
        date_int = int(date_dt.timestamp())

        # pull submissions from reddit
        api = PushshiftAPI()
        submitted = list(
            api.search_submissions(after=date_int,
                                   subreddit=subsources,
                                   filter=filters,
                                   limit=limit))

        # pull list of legit symbols from database to compare with submissions
        with Database() as db:
            query = "SELECT id, symbol FROM symbols"
            rows = db.query(query)
        symbols = {f"${row['symbol']}": row['id'] for row in rows}

        args = []
        for submit in submitted:
            words = submit.title.split()

            def extract(word):
                return word.lower().startswith('$')

            cashtags = list(set(filter(extract, words)))
            if len(cashtags) == 0:
                continue
            for cashtag in cashtags:
                if cashtag not in symbols:
                    continue
                submit_time = dt.fromtimestamp(submit.created_utc).isoformat()
                arg = (submit_time, symbols[cashtag], submit.title, subsources,
                       submit.url)
                args.append(arg)

        if len(args) > 0:
            with Database() as db:
                sql = """
                    INSERT INTO mention (date, symbol_id, mentions,
                                         source, url)
                    VALUES (%s, %s, %s, %s, %s)
                    ON CONFLICT DO NOTHING
                    """
                db.executemany(sql, args)
                db.commit()
Example #2
0
    async def save_to_database(data):
        """Save fundamental data in a database"""

        df = data.unstack().reset_index()
        df.columns = ['source', 'variable', 'symbol', 'value']
        df = df[['symbol', 'source', 'variable', 'value']]
        df['value'] = df['value'].astype(str)
        values = df.to_records(index=False)
        sql = """
            INSERT INTO fundamentals (symbol_id, source, var, val)
            VALUES ((SELECT id FROM symbols WHERE symbol=$1), $2, $3, $4)
            ON CONFLICT (symbol_id, source, var)
            DO UPDATE
            SET symbol_id=excluded.symbol_id,
                source=excluded.source,
                var=excluded.var,
                val=excluded.val;
            """

        # using asynciopf as the Database connection manager for Async
        creds = {
            'host': S.DB_HOST,
            'database': S.DB_NAME,
            'user': S.DB_USER,
            'password': S.DB_PASSWORD
        }
        async with asyncpg.create_pool(**creds) as pool:
            async with pool.acquire() as db:
                async with db.transaction():
                    await db.executemany(sql, values)

        Database().timestamp('fundamentals', close=True)
Example #3
0
    async def save_to_database(data):
        """Save price data in a database"""

        rows = data.itertuples(index=False)
        values = [list(row) for row in rows]
        sql = """
            INSERT INTO price_history (date, symbol_id, open, high, low,
                                       close, adj_close, volume)
            VALUES ($1, (SELECT id FROM symbols WHERE symbol=$2),
                    $3, $4, $5, $6, $7, $8)
            ON CONFLICT (symbol_id, date)
            DO UPDATE
            SET symbol_id=excluded.symbol_id,
                date=excluded.date, open=excluded.open,
                high=excluded.high, low=excluded.low,
                close=excluded.close,
                adj_close=excluded.adj_close,
                volume=excluded.volume;
            """

        # using asynciopg as the Database connection manager for Async
        creds = {
            'host': S.DB_HOST,
            'database': S.DB_NAME,
            'user': S.DB_USER,
            'password': S.DB_PASSWORD
        }
        async with asyncpg.create_pool(**creds) as pool:
            async with pool.acquire() as db:
                async with db.transaction():
                    await db.executemany(sql, values)

        Database().timestamp('price_history', close=True)
Example #4
0
    async def save_to_database(self):
        """Save signal table to database"""

        # long format table
        signal_df = self.signal.reset_index()
        signal_df = signal_df.melt(id_vars=['index'])
        signal_df = signal_df.fillna('')

        with Database() as db:
            # erase previous table contents
            sql = "TRUNCATE TABLE signals"
            db.execute(sql)
            db.commit()

            # save data into signals table database
            # pd.values deals with an boolean conflict between pandas and SQL
            values = [tuple(x) for x in signal_df.values]
            sql = """
                INSERT INTO signals (symbol_id, study, value)
                VALUES ((SELECT id FROM symbols WHERE symbol=%s), %s, %s)
                ON CONFLICT (symbol_id, study) DO UPDATE
                SET symbol_id = excluded.symbol_id,
                    study = excluded.study,
                    value = excluded.value;
                """
            db.executemany(sql, values)

            db.timestamp('signals')
            db.commit()

        log.info("Saved signal table to database")
Example #5
0
    def __init__(self,
                 symbols: list = None,
                 forced: bool = False,
                 source: str = 'tda'):
        """Gets price history data from download if not within 24 hours,
        then from data, finally from file.

        :param symbols: list of symbols
        :param forced: True bypassess 24 hour download requirement
        :param source: Source from where the data is downloaded
        """

        log.debug("Loading fundamentals")
        s = t()

        data = pd.DataFrame()

        # checks on last database update for price history
        within24 = Database().last_update('price_history', close=True)

        # load from database if within 24 hours
        if within24 and not forced:
            data = self.load_database()

        # download data if database is empty or expired data
        if data.empty or data is None or not within24 or forced:
            data = self.download(symbols, source)

            # update symbol rejects database
            Database().update_rejected_symbols(symbols=symbols,
                                               current=data['symbol'].unique(),
                                               close=True)

            asyncio.run(self.save_to_database(data))

        if data.empty or data is None:
            raise Exception("Empty price history database")

        self.data = data

        log.debug(f"Price History Loaded in {t() - s} secs")
Example #6
0
    def load_database():
        """Load symbols & security names from database, excludes rejects"""

        query = """
            SELECT symbol, security FROM symbols
            WHERE symbols.id
            NOT IN (SELECT symbol_id FROM rejected_symbols)
            ORDER BY symbol;
            """
        with Database() as db:
            df = pd.read_sql_query(query, con=db.connection)
        log.debug("Symbols loaded from database")
        return df
Example #7
0
    def last_update(table):
        """last update date"""

        now_utc = pytz.utc.localize(datetime.datetime.utcnow())
        timestamp = now_utc - datetime.timedelta(hours=25)

        query = f"SELECT date FROM last_update WHERE tbl = '{table}';"
        with Database() as db:
            timestamp_db = db.query(query)[0]

        timestamp = timestamp if timestamp_db is None else timestamp_db
        within_24 = (now_utc - timestamp).seconds <= 24 * 60 * 60
        timestamp_est = timestamp.astimezone(pytz.timezone('US/Eastern'))
        return timestamp_est.strftime('%m-%d-%Y %I:%M %p %Z'), within_24
Example #8
0
    def __init__(self, symbols: list, forced: bool = False):
        """Load fundamentals from database/file. Over 24 hrs it downloads.

        :param symbols: list of symbols
        :param forced: True bypasses within 24 hour requirement
        """

        log.debug("Loading fundamentals")
        s = t()

        data = pd.DataFrame()

        # checks on last database update for fundamentals
        within24 = Database().last_update('fundamentals', close=True)

        # load from database if within 24 hours
        if within24 and not forced:
            data = self.load_database()
            data = self.process_loaded_fundamentals(data)

        # download data if database is empty or expired data
        if data.empty or data is None or not within24 or forced:
            data = self.download(symbols)

            # update symbol rejects database
            Database().update_rejected_symbols(symbols=symbols,
                                               current=list(data.index),
                                               close=True)

            asyncio.run(self.save_to_database(data))

        if data.empty or data is None:
            raise Exception("Empty fundamentals database")

        self.data = data

        log.debug(f"Fundamentals Loaded in {t() - s} secs")
Example #9
0
    def save_to_database(df):
        """Save data to database"""

        values = df.to_records(index=False)
        sql = """
            INSERT INTO symbols (symbol, security, id)
            VALUES (%s, %s, %s)
            ON CONFLICT (id) DO UPDATE
            SET security=excluded.security;
            """
        with Database() as db:
            db.executemany(sql, values)
            db.timestamp('symbols')
            db.commit()
        log.debug("Symbols saved to database")
Example #10
0
def create_table():
    """Create tables in database.

    Usage:
        * python fybot create_table
    """
    if sys.argv[1] in ["create_tables", "create_table", "create",
                       "table", "reset", "nukeit", "createtable",
                       "start", "go"]:
        try:
            from fybot.core.database import Database
            Database().create_table()
            sys.exit()
        except Exception as e:
            log.error(f"Error creating tables in database. {e}")
            sys.exit(e)
Example #11
0
    def load_database():
        """Retrieves fundamental data from database, filtered of rejects"""

        query = """
            SELECT symbols.symbol,
                fundamentals.source,
                fundamentals.var,
                fundamentals.val 
            FROM fundamentals
            INNER JOIN symbols
            ON fundamentals.symbol_id = symbols.id
            WHERE symbols.id
            NOT IN (SELECT symbol_id FROM rejected_symbols)
            ORDER BY symbols.symbol;
            """
        with Database() as db:
            df = pd.read_sql_query(query, con=db.connection)
        log.debug("Loaded fundamentals from database")
        return df
Example #12
0
    def load_database():
        """Load signals table from database.

        :return: Dataframe with index: symbol, columns: study, values: value
        """

        query = """
            SELECT symbols.symbol, signals.study, signals.value
            FROM signals
            INNER JOIN symbols
            ON signals.symbol_id = symbols.id;
            """
        with Database() as db:
            df = pd.read_sql_query(query, db.connection)

        # wide format table
        df = df.pivot(index='symbol', columns='study', values='value')
        df = df.replace({'true': True, 'false': False}).fillna('')
        return df
Example #13
0
    def __init__(self, source: str, forced: bool = False):
        """Load symbols from database or file, when older than 24 hours it
        downloads. If DEBUG then symbols list is limited.

        Returns: list of symbols, dataframe with symbol/security names.

        :param source: 'nasdaq' (default), 'alpaca', 'sp500'
        :param forced: True to force symbols download, bypasses within 24 req
        """

        log.debug("Loading assets")
        s = t()

        data = pd.DataFrame()

        # checks on last database update for symbols
        within24 = Database().last_update('symbols', close=True)

        # load from database if within 24 hours
        if within24 and not forced:
            data = self.load_database()

        # download data if database is empty or expired data
        if data.empty or data is None or not within24 or forced:
            data = self.download(source)
            data = self.apply_filters(data)
            data = data.assign(symbol_id=lambda x: self.unique_id(x))
            self.save_to_database(data)
            # reloading to exclude rejected symbols
            data = self.load_database()

        if data.empty or data is None:
            raise Exception("Empty symbols database")

        symbols = data['symbol'].to_list()
        symbols = symbols[:S.MAX_SYMBOLS]

        self.data = data
        self.symbols = symbols

        log.debug(f"Assets Loaded in {t()-s} secs")
Example #14
0
    def load_database():
        """Retrieves the price data from database, filtered of rejects"""
        query = """
            SELECT price_history.date,
                symbols.symbol,
                price_history.open,
                price_history.high,
                price_history.low,
                price_history.close,
                price_history.adj_close,
                price_history.volume
            FROM price_history
            INNER JOIN symbols
            ON price_history.symbol_id = symbols.id
            WHERE symbols.id
            NOT IN (SELECT symbol_id FROM rejected_symbols)
            ORDER BY symbols.symbol, price_history.date;
            """

        with Database() as db:
            df = pd.read_sql_query(query, con=db.connection)
        log.debug("Loaded price history from database")
        return df
Example #15
0
    def show_reddit(subsources, num_days):
        with Database() as db:
            query = """
                SELECT COUNT(*) AS num_mentions, symbol
                FROM mention JOIN symbols ON symbols.id = mention.symbol_id
                WHERE date(date) > current_date - interval '%s day'
                GROUP BY symbol_id, symbol   
                HAVING COUNT(symbol) > 10
                ORDER BY num_mentions DESC
            """
            counts = db.query(query, (num_days, ))

            query = """
                SELECT symbol, mentions, url, date, source
                FROM mention JOIN symbols ON symbols.id = mention.symbol_id
                WHERE source = %s
                ORDER BY date DESC
                LIMIT 50;
            """
            mentions = db.query(query, (subsources, ))

            rows = db.fetchall()

        return counts, mentions, rows
Example #16
0
 def save_ark(data):
     with Database() as db:
         etfs = data['fund'].unique()
         etfs = ", ".join("'{0}'".format(x) for x in etfs)
         query = f"""SELECT id, symbol
                     FROM symbols 
                     WHERE symbol IN ({etfs})"""
         etfs = db.query(query)
         etfs = {i['symbol']: i['id'] for i in etfs}
         for row in data.itertuples():
             if not row.ticker:
                 continue
             query = "SELECT id FROM symbols WHERE symbol = %s"
             holding = db.query(query, (row.ticker, ))
             if not holding:
                 continue
             query = """
              INSERT INTO portfolio 
                          (id, holding_id, date, shares, weight)
              VALUES (%s, %s, %s, %s, %s)
              ON CONFLICT DO NOTHING;"""
             db.execute(query, (etfs[row.fund], holding['id'], row.date,
                                row.shares, row.weight))
         db.commit()