def insert_company(self): print( "Insert new company information" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols = mdb_query.get_symbols() #Get companies already in MongoDB mdb_companies = mdb_query.get_company( mdb_symbols['symbol'].tolist() ) #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) #Loop through symbols for index, mdb_symbol in mdb_symbols.iterrows(): #Insert if no mdb company data exists if mdb_companies.empty: #Get company data from IEX iex_company = iex.get_company( mdb_symbol["symbol"] ) #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting company for " + mdb_symbol["symbol"] + " ", length = 50) self.db.iex_company.insert_many( iex_company.to_dict('records') ) continue #Skip company if already in MongoDB if not mdb_companies[ mdb_companies['symbol'] == mdb_symbol['symbol'] ].empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol["symbol"] + " ", length = 50) continue #Get company data from IEX iex_company = iex.get_company( mdb_symbol["symbol"] ) #Insert if company data exists if not iex_company.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting company for " + mdb_symbol["symbol"] + " ", length = 50) self.db.iex_company.insert_many( iex_company.to_dict('records') ) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No data for " + mdb_symbol["symbol"] + " ", length = 50)
def insert_financials(self): print( "Insert new financials" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols = mdb_query.get_active_companies() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Get latest financials in MongoDB for each symbol mdb_financials = mdb_query.get_financials( mdb_symbols.tolist(), currDate, "latest" ) #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) #Loop through symbols for index, mdb_symbol in mdb_symbols.iteritems(): #Get financials from IEX iex_financials = iex.get_financials( mdb_symbol ) #Get matching financial in MongoDB mdb_financial = mdb_financials[ mdb_financials['symbol'] == mdb_symbol ] #Select financials more recent than MongoDB if not mdb_financial.empty and not iex_financials.empty: mask = iex_financials['reportDate'] > mdb_financial['reportDate'].iloc[0] iex_financials = iex_financials.loc[mask] #Insert if financials exist if not iex_financials.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting financials for " + mdb_symbol + " ", length = 50) self.db.iex_financials.insert_many( iex_financials.to_dict('records') ) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50)
def insert_dividends(self): print( "Insert new dividends" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB #mdb_symbols = mdb_query.get_active_companies() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Get existing portfolios portfolios = mdb_query.get_portfolios(currDate)[["portfolioID","inceptionDate"]] #Loop through portfolios mdb_symbols = pandas.DataFrame() for portfolio_index, portfolio_row in portfolios.iterrows(): #Get portfolioID and inceptionDate portfolio = portfolio_row['portfolioID'] inceptionDate = portfolio_row['inceptionDate'] #Default to calculating holdings from inception date = inceptionDate #Get current holdings table holdings = mdb_query.get_holdings(portfolio, inceptionDate, "after") #print( holdings ) mdb_symbols = mdb_symbols.append(holdings, ignore_index=True, sort=False) #print( mdb_symbols ) mdb_symbols = mdb_symbols[mdb_symbols['symbol'] != 'USD'] mdb_symbols = mdb_symbols['symbol'].unique().tolist() #print( mdb_symbols ) #quit() #Get latest dividend in MongoDB for each symbol mdb_dividends = mdb_query.get_dividends( mdb_symbols, currDate, "latest" ) #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols), prefix = 'Progress:', suffix = '', length = 50) #flag = False #Loop through symbols for index, mdb_symbol in enumerate(mdb_symbols): #if mdb_symbol["symbol"] == "ZZZZZZZZZ": # flag = True #if not flag: # continue #Get 1m of dividends from IEX iex_dividends = iex.get_dividends( mdb_symbol, ref_range='1m' ) #Get matching dividend in MongoDB mdb_dividend = mdb_dividends[ mdb_dividends['symbol'] == mdb_symbol ] #Select dividends more recent than MongoDB if not mdb_dividend.empty and not iex_dividends.empty: mask = iex_dividends['exDate'] > mdb_dividend['exDate'].iloc[0] iex_dividends = iex_dividends.loc[mask] #Insert if dividends exist if not iex_dividends.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols), prefix = 'Progress:', suffix = "Inserting dividend for " + mdb_symbol + " ", length = 50) #print( iex_dividends ) self.db.iex_dividends.insert_many( iex_dividends.to_dict('records') ) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50)
def pf_sell_all(self, ref_date="1990-01-01"): """ Sell all holdings on a particular date @params: ref_date - Optional : date YYYY-MM-DD (Str) """ mdb_query = Query() #Get link to MongoDB #Get date for stock prices dayBeforeDate = (pandas.Timestamp(ref_date) + pandas.DateOffset(days=-1)).strftime('%Y-%m-%d') #Get existing portfolios portfolios = mdb_query.get_portfolios(ref_date)[[ "portfolioID", "inceptionDate" ]] #Loop through portfolios for portfolio_index, portfolio_row in portfolios.iterrows(): #Get portfolioID and inceptionDate portfolio = portfolio_row['portfolioID'] #Get holdings table holdings = mdb_query.get_holdings(portfolio, ref_date, "on") #Get latest prices from dayBeforeDate prices = mdb_query.get_chart(holdings['symbol'].tolist(), dayBeforeDate, 'latest') #Merge prices with holdings holdings = pandas.merge(holdings, prices, how='left', left_on=['symbol'], right_on=['symbol'], sort=False) #Remove USD holdings = holdings[holdings['symbol'] != 'USD'] #print( holdings ) #Build transaction tables which sell the stocks transaction_tables = [] #Loop through stocks to be sold for index, stock in holdings.iterrows(): #Transaction tables: type = sell, date = ref_date, price = close, volume = endOfDayQuantity transaction_table = { "portfolioID": portfolio_row.portfolioID, "symbol": stock.symbol, "type": "sell", "date": ref_date, "price": stock.close, "volume": stock.endOfDayQuantity, "commission": 0.0 } transaction_tables.append(transaction_table) insert_pf_transactions = True if insert_pf_transactions: #print( transaction_tables ) self.db.pf_transactions.insert_many(transaction_tables)
def insert_stats(self): print( "Insert new stats" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols_full = mdb_query.get_active_companies() #mdb_symbols_full = mdb_symbols_full[mdb_symbols_full == 'A'] #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = '', length = 50) idx_min = 0 query_num = 100 #flag = True while idx_min < len(mdb_symbols_full.index): #if idx_min > 1: # break idx_max = idx_min + query_num if idx_max > len(mdb_symbols_full.index): idx_max = len(mdb_symbols_full.index) mdb_symbols = mdb_symbols_full.iloc[ idx_min:idx_max ] mdb_symbols.reset_index(drop=True, inplace=True) #Get latest price in MongoDB for each symbol up to 50 days ago mdb_stats = mdb_query.get_stats( mdb_symbols.tolist(), currDate, "latest" ) #Loop through symbols for index, mdb_symbol in mdb_symbols.iteritems(): #Get stat from IEX #print( mdb_symbol ) #print( mdb_symbol["symbol"] ) #if mdb_symbol == "BAH": # flag = False #if flag: # continue iex_stat = iex.get_stats( mdb_symbol ) #Get matching stat in MongoDB if not mdb_stats.empty: mdb_stat = mdb_stats[ mdb_stats['symbol'] == mdb_symbol ] else: mdb_stat = mdb_stats #Select stats more recent than MongoDB if not iex_stat.empty and not mdb_stat.empty: mask = iex_stat['date'] > mdb_stat['date'].iloc[0] iex_stat = iex_stat.loc[mask] #Insert if stats exist #print( iex_stat ) if not iex_stat.empty: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "Inserting stat for " + mdb_symbol + " ", length = 50) self.db.iex_stats.insert_many( iex_stat.to_dict('records') ) else: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50) idx_min = idx_min + query_num
def insert_quotes(self): print( "Insert new quotes" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols_full = mdb_query.get_active_companies() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = '', length = 50) idx_min = 0 query_num = 100 while idx_min < len(mdb_symbols_full.index): idx_max = idx_min + query_num if idx_max > len(mdb_symbols_full.index): idx_max = len(mdb_symbols_full.index) mdb_symbols = mdb_symbols_full.iloc[ idx_min:idx_max ] mdb_symbols.reset_index(drop=True, inplace=True) #Get latest price in MongoDB for each symbol up to 50 days ago mdb_quotes = mdb_query.get_quotes( mdb_symbols.tolist(), currDate, "latest" ) #Loop through symbols iex_quotes = pandas.DataFrame() for index, mdb_symbol in mdb_symbols.iteritems(): #Get matching quote in MongoDB if not mdb_quotes.empty: mdb_quote = mdb_quotes[ mdb_quotes['symbol'] == mdb_symbol ] else: mdb_quote = mdb_quotes #continue if already up to date if not mdb_quote.empty and (mdb_quote['date'].iloc[0] == currDate): #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50) continue #Get quote from IEX iex_quote = iex.get_quote( mdb_symbol ) #Select quotes more recent than MongoDB if not iex_quote.empty and not mdb_quote.empty: mask = iex_quote['date'] > mdb_quote['date'].iloc[0] iex_quote = iex_quote.loc[mask] #Insert if quotes exist if not iex_quote.empty: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "Inserting quote for " + mdb_symbol + " ", length = 50) #Append quote iex_quotes = iex_quotes.append(iex_quote, ignore_index=True, sort=False) else: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50) #Bulk insert quotes if not iex_quotes.empty: self.db.iex_quotes.insert_many( iex_quotes.to_dict('records') ) idx_min = idx_min + query_num
def insert_balancesheets(self): print( "Insert new balance sheets" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols = mdb_query.get_active_companies() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") threeMonthsAgo = (pandas.Timestamp(currDate) + pandas.DateOffset(days=-120)).strftime('%Y-%m-%d') #Get latest balancesheets in MongoDB for each symbol mdb_balancesheets = mdb_query.get_balancesheets( mdb_symbols.tolist(), currDate, "latest" ) #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) #flag = False #Loop through symbols for index, mdb_symbol in mdb_symbols.iteritems(): #if mdb_symbol == 'YETI': # flag = True #if not flag: # continue #Get matching balancesheet in MongoDB if not mdb_balancesheets.empty: mdb_balancesheet = mdb_balancesheets[ mdb_balancesheets['symbol'] == mdb_symbol ] else: mdb_balancesheet = mdb_balancesheets #Skip is less than 3 months since most recent if not mdb_balancesheet.empty: mask = mdb_balancesheet['reportDate'] > threeMonthsAgo mdb_recent_balancesheet = mdb_balancesheet.loc[mask] if not mdb_recent_balancesheet.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Data too recent for " + mdb_symbol + " ", length = 50) continue #Get balancesheets from IEX iex_balancesheets = iex.get_balancesheets( mdb_symbol ) #Select balancesheets more recent than MongoDB if not mdb_balancesheet.empty and not iex_balancesheets.empty: mdb_balancesheet = mdb_balancesheet.sort_values(by='reportDate', ascending=False, axis='index') mask = iex_balancesheets['reportDate'] > mdb_balancesheet['reportDate'].iloc[0] iex_balancesheets = iex_balancesheets.loc[mask] #Insert if balancesheets exist if not iex_balancesheets.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting balancesheets for " + mdb_symbol + " ", length = 50) self.db.iex_balancesheets.insert_many( iex_balancesheets.to_dict('records') ) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50)
def pf_buy_all(self, ref_date="1990-01-01"): """ Buy all stocks in the top stocks list on a particular date @params: ref_date - Optional : date YYYY-MM-DD (Str) """ mdb_query = Query() #Get link to MongoDB #Get date for stock prices dayBeforeDate = (pandas.Timestamp(ref_date) + pandas.DateOffset(days=-1)).strftime('%Y-%m-%d') #Get ranked stock list for current date top_stocks_full = calculate_top_stocks(dayBeforeDate) #Get existing portfolios portfolios = mdb_query.get_portfolios(ref_date) #Loop through portfolios for portfolio_index, portfolio_row in portfolios.iterrows(): #Get holdings table holdings = mdb_query.get_holdings(portfolio_row.portfolioID, ref_date, "on") holdings_usd = holdings[holdings['symbol'] == 'USD'] holdings_usd.reset_index(drop=True, inplace=True) top_stocks = top_stocks_full[ top_stocks_full['marketCap'] > portfolio_row.mcapMinimum].head( portfolio_row.nStocks).reset_index(drop=True) transaction_tables = [] #Loop through stocks to be purchased for index, stock in top_stocks.iterrows(): #Calculate volume of stock to be purchased #(Current USD holding/No. of stocks)/Price rounded to integer volume = holdings_usd.endOfDayQuantity.iloc[0] volume = volume / portfolio_row.nStocks volume = volume / stock.close volume = round(volume) transaction_table = { "portfolioID": portfolio_row.portfolioID, "symbol": stock.symbol, "type": "buy", "date": ref_date, "price": stock.close, "volume": volume, "commission": 0.0 } transaction_tables.append(transaction_table) insert_pf_transactions = True if insert_pf_transactions: self.db.pf_transactions.insert_many(transaction_tables)
def insert_symbols(self): print( "Insert new symbols" ) mdb_query = Query() iex = Iex() #Get all common stocks from IEX symbols = iex.get_symbols(ref_type="cs") #Get SPY (S&P500 exchange traded index) from IEX symbols_spy = iex.get_symbols(ref_symbol="SPY") #Reset indices (probably not necessary) symbols.reset_index(drop=True, inplace=True) symbols_spy.reset_index(drop=True, inplace=True) #Append SPY to stocks symbols = symbols.append(symbols_spy, ignore_index=True, sort=False) symbols.reset_index(drop=True, inplace=True) #Get symbols already in MongoDB mdb_symbols = mdb_query.get_symbols() #Initial call to print 0% progress printProgressBar(0, len(symbols.index), prefix = 'Progress:', suffix = '', length = 50) #Loop through symbols for index, symbol in symbols.iterrows(): #Exclude forbidden characters forbidden = ["#"] if any( x in symbol["symbol"] for x in forbidden): #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Symbol contains forbidden character: " + symbol["symbol"] + " ", length = 50) continue #If MongoDB empty insert symbol if mdb_symbols.empty: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Inserting new symbol: " + symbol["symbol"] + " ", length = 50) self.db.iex_symbols.insert_one( symbol.to_dict() ) else: #Is symbol already in MongoDB mask = (mdb_symbols['iexId'] == symbol['iexId']) & (mdb_symbols['isEnabled'] == symbol['isEnabled']) & (mdb_symbols['name'] == symbol['name']) & (mdb_symbols['type'] == symbol['type']) #Insert if not in MongoDB if mdb_symbols.loc[mask].empty: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Inserting new symbol: " + symbol["symbol"] + " ", length = 50) self.db.iex_symbols.insert_one( symbol.to_dict() ) else: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Symbol " + symbol["symbol"] + " already exists ", length = 50)
def insert_stock_list(self): print( "Insert ranked stock list" ) mdb_query = Query() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Delete stock lists older than one week #No reason to keep them weekBeforeDate = (pandas.Timestamp(currDate) + pandas.DateOffset(days=-7)).strftime('%Y-%m-%d') query = { "date": { "$lt": weekBeforeDate } } self.db.pf_stock_list.delete_many( query ) #Get ranked stock list for current date mdb_algo = Algo() merged = mdb_algo.calculate_top_stocks(currDate) #Skip if latest date already in database latestDate = merged.sort_values(by="date", ascending=False, axis="index") latestDate = latestDate.iloc[0]["date"] latestStockList = mdb_query.get_stock_list(latestDate, "on") if latestStockList.empty and not merged.empty: print( "Inserting stock list" ) self.db.stock_list.insert_many( merged.to_dict('records') )
def delete_duplicate_balancesheets(self, ref_date = "1990-01-01", when = "on"): """ Delete duplicates prices in MongoDB @params: ref_date - Optional : date YYYY-MM-DD (Str) when - Optional : on, latest (Str) """ mdb_query = Query() mdb_symbols = mdb_query.get_symbols() #mdb_symbols = mdb_symbols.iloc[ 999: , : ] mdb_symbols.reset_index(drop=True, inplace=True) #mdb_symbols = ["A"] #print( mdb_symbols ) printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) for index, symbol in mdb_symbols.iterrows(): #if index > 10: # break #print( symbol ) query = { "symbol": { "$in": [symbol["symbol"]] }, "reportDate": { "$gte": ref_date } } results = self.db.iex_balancesheets.find( query ).sort("reportDate", DESCENDING) balancesheets = pandas.DataFrame() for doc in results: balancesheets = balancesheets.append( pandas.DataFrame.from_dict(doc, orient='index').T, ignore_index=True, sort=False ) duplicates = balancesheets[balancesheets.duplicated(['reportDate'])] print( duplicates ) # Remove all duplicates in one go #if not duplicates.empty: # self.db.iex_charts.delete_many({"_id":{"$in":duplicates['_id'].tolist()}}) # Remove duplicates if they exist if not duplicates.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Deleting duplicates for " + symbol["symbol"] + " ", length = 50) #self.db.iex_balancesheets.delete_many({"_id":{"$in":duplicates['_id'].tolist()}}) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No duplicates for " + symbol["symbol"] + " ", length = 50)
def calculate_top_stocks_old(self, ref_date): """ Calculate ranked list of stocks @params: ref_date - Required : date YYYY-MM-DD (Str) """ mdb_query = Query() #Get ranked stock list for given date symbols = mdb_query.get_active_companies().tolist() print("Query earnings") earnings = mdb_query.get_earnings(symbols, ref_date, "latest", "EPSReportDate") earnings = earnings[[ "EPSReportDate", "actualEPS", "fiscalEndDate", "fiscalPeriod", "symbol" ]] #print( earnings ) #Get financials within 6 months print("Query financials") sixMonthsBeforeDate = ( pandas.Timestamp(ref_date) + pandas.DateOffset(months=-6)).strftime('%Y-%m-%d') financials = mdb_query.get_financials(symbols, sixMonthsBeforeDate, "after") financials = financials[ financials['reportDate'] <= earnings['fiscalEndDate'].max()] financials = financials[[ "symbol", "reportDate", "netIncome", "shareholderEquity" ]] #print( financials ) #Get prices for inception date print("Query prices") idx_min = 0 query_num = 100 prices = pandas.DataFrame() while idx_min < len(symbols): idx_max = idx_min + query_num if idx_max > len(symbols): idx_max = len(symbols) symbols_split = symbols[idx_min:idx_max] prices_split = mdb_query.get_chart(symbols_split, ref_date, "latest") prices = prices.append(prices_split, ignore_index=True, sort=False) idx_min = idx_min + query_num prices.reset_index(drop=True, inplace=True) #print( prices ) #Get company data company = mdb_query.get_company(symbols) company = company[['symbol', 'companyName', 'industry', 'sector']] #Merge dataframes together print("Merge dataframes") merged = pandas.merge(earnings, financials, how='inner', left_on=["symbol", "fiscalEndDate"], right_on=["symbol", "reportDate"], sort=False) merged = pandas.merge(merged, prices, how='inner', on="symbol", sort=False) merged = pandas.merge(merged, company, how='inner', on='symbol', sort=False) #Remove any rows with missing values merged = merged.dropna( axis=0, subset=["netIncome", "actualEPS", "close", "shareholderEquity"]) #Calculate marketCap value # price * netIncome / EPS = price * sharesOutstanding = mcap # Actually not 100% accurate, should be netIncome - preferred dividend # Doesn't perfectly match IEX value or google - probably good enough merged["sharesOutstanding"] = merged.netIncome / merged.actualEPS merged["marketCap"] = merged.sharesOutstanding * merged.close #Calculate PE, ROE, and ratio merged["peRatio"] = merged.close / merged.actualEPS merged["returnOnEquity"] = merged.netIncome / merged.shareholderEquity merged["peROERatio"] = merged.peRatio / merged.returnOnEquity #Count number of stocks above mcap value # A useful indicator of how universe compares to S&P500 print("Universe before cuts...") print("mcap > 50M: " + str(merged[merged["marketCap"] > 50000000].count()["marketCap"])) print( "mcap > 100M: " + str(merged[merged["marketCap"] > 100000000].count()["marketCap"])) print( "mcap > 500M: " + str(merged[merged["marketCap"] > 500000000].count()["marketCap"])) print( "mcap > 1B: " + str(merged[merged["marketCap"] > 1000000000].count()["marketCap"])) print( "mcap > 5B: " + str(merged[merged["marketCap"] > 5000000000].count()["marketCap"])) print("mcap > 10B: " + str(merged[ merged["marketCap"] > 10000000000].count()["marketCap"])) print("mcap > 50B: " + str(merged[ merged["marketCap"] > 50000000000].count()["marketCap"])) print("mcap > 100B: " + str(merged[ merged["marketCap"] > 100000000000].count()["marketCap"])) #Rank stocks #Cut negative PE and ROE merged = merged[(merged.peRatio > 0) & (merged.returnOnEquity > 0)] #Remove invalid stock symbols, and different voting options # Do the different voting options affect marketCap? #forbidden = [ "#", ".", "-" ] #merged = merged[ merged.apply( lambda x: not any( s in x['symbol'] for s in forbidden ), axis=1 ) ] #Remove American Depositary Shares #ads_str = 'American Depositary Shares' #merged = merged[ merged.apply( lambda x: ads_str not in x['companyName'], axis=1 ) ] #Remove industries that do not compare well # e.g. Companies that have investments as assets #forbidden_industry = ['Brokers & Exchanges','REITs','Asset Management','Banks'] #merged = merged[ ~merged.industry.isin( forbidden_industry ) ] #Count number of stocks after cuts print("Universe after cuts...") print("mcap > 50M: " + str(merged[merged["marketCap"] > 50000000].count()["marketCap"])) print( "mcap > 100M: " + str(merged[merged["marketCap"] > 100000000].count()["marketCap"])) print( "mcap > 500M: " + str(merged[merged["marketCap"] > 500000000].count()["marketCap"])) print( "mcap > 1B: " + str(merged[merged["marketCap"] > 1000000000].count()["marketCap"])) print( "mcap > 5B: " + str(merged[merged["marketCap"] > 5000000000].count()["marketCap"])) print("mcap > 10B: " + str(merged[ merged["marketCap"] > 10000000000].count()["marketCap"])) print("mcap > 50B: " + str(merged[ merged["marketCap"] > 50000000000].count()["marketCap"])) print("mcap > 100B: " + str(merged[ merged["marketCap"] > 100000000000].count()["marketCap"])) #Order by peROERatio merged = merged.sort_values(by="peROERatio", ascending=True, axis="index") return merged
def insert_performance(self): print( "Insert portfolio performance tables" ) mdb_query = Query() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #currDate = "2019-12-27" #Get existing portfolios portfolios = mdb_query.get_portfolios(currDate)[["portfolioID","inceptionDate"]] #Loop through portfolios for portfolio_index, portfolio_row in portfolios.iterrows(): #Get portfolioID and inceptionDate portfolio = portfolio_row.portfolioID inceptionDate = portfolio_row.inceptionDate print( 'Inserting performance tables for ' + portfolio ) #Get holdings tables from inception holdings = mdb_query.get_holdings(portfolio, inceptionDate, "after").sort_values(by="lastUpdated", ascending=False, axis="index") #print( holdings ) #Default to calculating performance from inception date = inceptionDate #Get list of symbols in holdings table symbols = holdings["symbol"].unique().tolist() #Get existing performance table for portfolio sorted by date performance = mdb_query.get_performance([portfolio], inceptionDate) if not performance.empty: performance.sort_values(by="date", ascending=False, axis="index", inplace=True) #Get close value from last date and increment the date perf_tables = [] prevCloseValue = 0 adjPrevCloseValue = 0 if not performance.empty: date = performance.iloc[0]["date"] date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') adjPrevCloseValue = performance.iloc[0]["closeValue"] prevCloseValue = performance.iloc[0]["closeValue"] #print( date ) #Get prices for symbols in portfolio after date prices = mdb_query.get_quotes(symbols, date, "after") #print( prices ) #If there are no prices then can't calculate performance if prices.empty: print( "No prices!" ) continue #Get any transactions after date transactions = mdb_query.get_transactions(portfolio, date, "after") #print( transactions ) #Loop through dates while date <= currDate: #print( date ) #Initialize portfolio close of day values closeValue = 0 adjCloseValue = 0 #Get latest holding for each symbol on date holdings_date = holdings[holdings.lastUpdated <= date] holdings_date = holdings_date[holdings_date.groupby(['symbol'], sort=False)['lastUpdated'].transform(max) == holdings_date['lastUpdated']] #Merge with stock prices holdings_date = pandas.merge(holdings_date,prices[prices.date == date],how='left',left_on=["symbol"],right_on=["symbol"],sort=False) #Remove stocks no longer held holdings_date = holdings_date[ holdings_date['endOfDayQuantity'] != 0 ] #print( holdings_date ) #Skip if only USD in holdings if holdings_date[holdings_date.symbol != "USD"].empty: date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') continue #Skip any day where there aren't prices for all stocks if holdings_date[holdings_date.symbol != "USD"]['close'].isnull().values.any(): date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') continue #Calculate portfolio close of day value from close of day stock prices if not holdings_date.empty: for index, holding in holdings_date.iterrows(): if holding.symbol == "USD": closeValue = closeValue + (holding.endOfDayQuantity) else: closeValue = closeValue + (holding.endOfDayQuantity * holding.close) #Get any deposits or withdrawals deposits = pandas.DataFrame() withdrawals = pandas.DataFrame() if not transactions.empty: deposits = transactions[(transactions.date == date) & (transactions.type == "deposit")] withdrawals = transactions[(transactions.date == date) & (transactions.type == "withdrawal")] #print( deposits ) #print( withdrawals ) #Adjust close or previous close for withdrawals/deposits adjPrevCloseValue = prevCloseValue adjCloseValue = closeValue if not deposits.empty: for index, deposit in deposits.iterrows(): adjPrevCloseValue = adjPrevCloseValue + (deposit.volume * deposit.price) if not withdrawals.empty: for index, withdrawal in withdrawals.iterrows(): adjCloseValue = adjCloseValue + (withdrawal.volume * withdrawal.price) #If portfolio has no holdings or deposits yet then continue if adjPrevCloseValue == 0: date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') continue #Build portfolio performance table perf_table = { "portfolioID": portfolio, "date": date, "prevCloseValue": prevCloseValue, "closeValue": closeValue, "adjPrevCloseValue": adjPrevCloseValue, "adjCloseValue": adjCloseValue, "percentReturn": 100.*((adjCloseValue-adjPrevCloseValue)/adjPrevCloseValue) } perf_tables.append( perf_table ) #Reset previous close values prevCloseValue = closeValue adjPrevCloseValue = closeValue #Increment date date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') #Insert performance table insert_pf_performance = True #print( perf_tables ) if insert_pf_performance and len(perf_tables)>0: #print( perf_tables ) self.db.pf_performance.insert_many( perf_tables )
def insert_holdings(self): print( "Calculate portfolio holdings" ) mdb_query = Query() #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #currDate = "2019-12-30" #Get existing portfolios portfolios = mdb_query.get_portfolios(currDate)[["portfolioID","inceptionDate"]] #Loop through portfolios for portfolio_index, portfolio_row in portfolios.iterrows(): #Get portfolioID and inceptionDate portfolio = portfolio_row['portfolioID'] inceptionDate = portfolio_row['inceptionDate'] print( 'Calculating holdings for ', portfolio ) #Default to calculating holdings from inception date = inceptionDate #Get current holdings table holdings = mdb_query.get_holdings(portfolio, currDate, "on") #print( holdings ) #If holdings exist then calculate holdings from next date if not holdings.empty: date = holdings['lastUpdated'].max() date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') #If no existing holdings create 0 dollar entry to create table if holdings.empty: holding_dict = { "portfolioID": portfolio, "symbol": "USD", "endOfDayQuantity": 0.0, "lastUpdated": inceptionDate } holdings = pandas.DataFrame.from_dict(holding_dict, orient='index').T #Get all new transactions transactions = mdb_query.get_transactions(portfolio, date, "after") #Loop through dates and update holdings table while date <= currDate: #Insert dividends to transactions database and update transactions table #Dividends will be ahead of time so will be ready to be added on the correct date #Get dividends for all holdings except USD div_holdings = holdings[ ~holdings["symbol"].isin(["USD"]) ] dividends = mdb_query.get_dividends(div_holdings["symbol"].unique().tolist(), date, "on") #Get rid of any non-USD dividends if not dividends.empty: dividends = dividends[ dividends['currency'] == 'USD' ] if not dividends.empty: #Get dividends with exDate = date div_date = dividends[ dividends.exDate == date ] #Loop through dividends for d_index, dividend in div_date.iterrows(): #print( dividend ) #Is dividend already in transactions? #If not insert it #Skip dividends with bad data entries if not dividend.amount: print( "Skipping dividend as amount is empty!" ) continue if dividend.paymentDate == None: print( "Skipping dividend as paymentDate is None!" ) continue transactions_paymentDate = transactions if not transactions.empty: transactions_paymentDate = transactions[ (transactions.date == dividend.paymentDate) & (transactions.symbol == dividend.symbol) & (transactions.type == 'dividend') ] holding_quantity = holdings[holdings["symbol"] == dividend.symbol]["endOfDayQuantity"] holding_quantity.reset_index(drop=True, inplace=True) if transactions_paymentDate.empty and (holding_quantity != 0).any(): transaction_table = { "portfolioID": portfolio, "symbol": dividend.symbol, "type": "dividend", "date": dividend.paymentDate, "price": float(dividend.amount), "volume": holding_quantity.iloc[0], "commission": 0.0 } transactions = transactions.append( pandas.DataFrame.from_dict(transaction_table, orient='index').T, ignore_index=True, sort=False ) insert_pf_transactions = True if insert_pf_transactions: print( "Inserting dividend: " + date ) print( transaction_table ) self.db.pf_transactions.insert_one( transaction_table ) #Now attend to transactions on date if transactions.empty: #Increment date date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') continue transactions_date = transactions[transactions.date == date] #Loop through transactions for t_index, transaction in transactions_date.iterrows(): print( "Inserting transaction:" ) print( transaction ) #Get any existing holding for the transaction symbol if transaction.type == "dividend": holding = holdings[holdings.symbol == "USD"] else: holding = holdings[holdings.symbol == transaction.symbol] holding.reset_index(drop=True, inplace=True) #print( holding ) #Remove that holding from holdings table if not holding.empty: holdings = holdings[ ~holdings["symbol"].isin([holding['symbol'].iloc[0]]) ] #print( holdings ) #Add any dividends to the holdings table if transaction.type == "dividend": holding["endOfDayQuantity"] = holding["endOfDayQuantity"] + (transaction.price * transaction.volume) holding["lastUpdated"] = date holdings = holdings.append( holding, ignore_index=True, sort=False ) #Add any deposits to the holdings table if transaction.type == "deposit": holding["endOfDayQuantity"] = holding["endOfDayQuantity"] + (transaction.price * transaction.volume) holding["lastUpdated"] = date holdings = holdings.append( holding, ignore_index=True, sort=False ) #Add any stocks purchased to the holdings table if transaction.type == "buy": holding_dict = {} if not holding.empty: holding_dict = { "portfolioID": transaction.portfolioID, "symbol": transaction.symbol, "endOfDayQuantity": holding["endOfDayQuantity"].iloc[0] + transaction.volume, "lastUpdated": date } else: holding_dict = { "portfolioID": transaction.portfolioID, "symbol": transaction.symbol, "endOfDayQuantity": transaction.volume, "lastUpdated": date } #print( holding_dict ) holdings = holdings.append( pandas.DataFrame.from_dict(holding_dict, orient='index').T, ignore_index=True, sort=False ) #Adjust cash entry accordingly cash = holdings[holdings.symbol == "USD"] holdings = holdings[ ~holdings["symbol"].isin(["USD"]) ] cash["endOfDayQuantity"] = cash["endOfDayQuantity"] - (transaction.price * transaction.volume) cash["lastUpdated"] = date holdings = holdings.append( cash, ignore_index=True, sort=False ) #print( holdings ) #Remove any stocks sold from the holdings table if transaction.type == "sell": if not holding.empty: holding["endOfDayQuantity"] = holding["endOfDayQuantity"] - transaction.volume holding["lastUpdated"] = date holdings = holdings.append( holding, ignore_index=True, sort=False ) else: raise Exception("Trying to sell unowned stock!") #Adjust cash entry accordingly cash = holdings[holdings.symbol == "USD"] holdings = holdings[ ~holdings["symbol"].isin(["USD"]) ] cash["endOfDayQuantity"] = cash["endOfDayQuantity"] + (transaction.price * transaction.volume) cash["lastUpdated"] = date holdings = holdings.append( cash, ignore_index=True, sort=False ) #print( holdings ) #Upload new holdings entries to MongoDB holdings_date = holdings[holdings.lastUpdated == date] if not holdings_date.empty: print( "New holdings:" ) print( holdings_date ) insert_holdings_tx = True if insert_holdings_tx: print( "Inserting holdings for " + portfolio ) self.db.pf_holdings.insert_many( holdings_date.to_dict('records') ) #Increment date date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d')
def insert_prices(self): print( "Insert new charts" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols_full = mdb_query.get_active_companies() mdb_symbols_full = mdb_symbols_full[mdb_symbols_full == 'SPY'] #Get current date currDate = datetime.datetime.now().strftime("%Y-%m-%d") #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = '', length = 50) idx_min = 0 query_num = 100 #flag = False while idx_min < len(mdb_symbols_full.index): #if idx_min > 1: # break idx_max = idx_min + query_num if idx_max > len(mdb_symbols_full.index): idx_max = len(mdb_symbols_full.index) mdb_symbols = mdb_symbols_full.iloc[ idx_min:idx_max ] mdb_symbols.reset_index(drop=True, inplace=True) #Get latest price in MongoDB for each symbol up to 50 days ago mdb_charts = mdb_query.get_chart( mdb_symbols.tolist(), currDate, "latest" ) #print( mdb_charts ) #break #Loop through symbols for index, mdb_symbol in mdb_symbols.iteritems(): #Get matching chart in MongoDB mdb_chart = mdb_charts[ mdb_charts['symbol'] == mdb_symbol ] #Get first date to insert date = mdb_chart.date.iloc[0] date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') iex_chart = pandas.DataFrame() while date <= currDate: #Get date in correct format for IEX query iex_date = (pandas.Timestamp(date)).strftime('%Y%m%d') #Get IEX chart iex_chart_day = iex.get_chart( mdb_symbol, ref_range=iex_date ) iex_chart = iex_chart.append(iex_chart_day, ignore_index=True, sort=False) #Increment date date = (pandas.Timestamp(date) + pandas.DateOffset(days=1)).strftime('%Y-%m-%d') #Get 1y of charts from IEX #print( mdb_symbol ) #print( mdb_symbol["symbol"] ) #if mdb_symbol["symbol"] == "ZZZZZZZZZ": # flag = True #if not flag: # continue #iex_chart = iex.get_chart( mdb_symbol, ref_range='5d' ) #Get matching chart in MongoDB #mdb_chart = mdb_charts[ mdb_charts['symbol'] == mdb_symbol ] #Select charts more recent than MongoDB #if not iex_chart.empty and not mdb_chart.empty: # mask = iex_chart['date'] > mdb_chart['date'].iloc[0] # iex_chart = iex_chart.loc[mask] #Insert if charts exist #print( iex_chart ) if not iex_chart.empty: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "Inserting chart for " + mdb_symbol + " ", length = 50) #Print write error if couldn't insert charts try: print( iex_chart ) self.db.iex_charts.insert_many( iex_chart.to_dict('records') ) except BulkWriteError as bwe: print( bwe.details ) raise else: #Update progress bar printProgressBar(idx_min+index+1, len(mdb_symbols_full.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol + " ", length = 50) idx_min = idx_min + query_num