def insert_company(self): print( "Insert new company information" ) mdb_query = Query() iex = Iex() #Get all symbols in MongoDB mdb_symbols = mdb_query.get_symbols() #Get companies already in MongoDB mdb_companies = mdb_query.get_company( mdb_symbols['symbol'].tolist() ) #Initial call to print 0% progress printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) #Loop through symbols for index, mdb_symbol in mdb_symbols.iterrows(): #Insert if no mdb company data exists if mdb_companies.empty: #Get company data from IEX iex_company = iex.get_company( mdb_symbol["symbol"] ) #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting company for " + mdb_symbol["symbol"] + " ", length = 50) self.db.iex_company.insert_many( iex_company.to_dict('records') ) continue #Skip company if already in MongoDB if not mdb_companies[ mdb_companies['symbol'] == mdb_symbol['symbol'] ].empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No new data for " + mdb_symbol["symbol"] + " ", length = 50) continue #Get company data from IEX iex_company = iex.get_company( mdb_symbol["symbol"] ) #Insert if company data exists if not iex_company.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Inserting company for " + mdb_symbol["symbol"] + " ", length = 50) self.db.iex_company.insert_many( iex_company.to_dict('records') ) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No data for " + mdb_symbol["symbol"] + " ", length = 50)
def insert_symbols(self): print( "Insert new symbols" ) mdb_query = Query() iex = Iex() #Get all common stocks from IEX symbols = iex.get_symbols(ref_type="cs") #Get SPY (S&P500 exchange traded index) from IEX symbols_spy = iex.get_symbols(ref_symbol="SPY") #Reset indices (probably not necessary) symbols.reset_index(drop=True, inplace=True) symbols_spy.reset_index(drop=True, inplace=True) #Append SPY to stocks symbols = symbols.append(symbols_spy, ignore_index=True, sort=False) symbols.reset_index(drop=True, inplace=True) #Get symbols already in MongoDB mdb_symbols = mdb_query.get_symbols() #Initial call to print 0% progress printProgressBar(0, len(symbols.index), prefix = 'Progress:', suffix = '', length = 50) #Loop through symbols for index, symbol in symbols.iterrows(): #Exclude forbidden characters forbidden = ["#"] if any( x in symbol["symbol"] for x in forbidden): #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Symbol contains forbidden character: " + symbol["symbol"] + " ", length = 50) continue #If MongoDB empty insert symbol if mdb_symbols.empty: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Inserting new symbol: " + symbol["symbol"] + " ", length = 50) self.db.iex_symbols.insert_one( symbol.to_dict() ) else: #Is symbol already in MongoDB mask = (mdb_symbols['iexId'] == symbol['iexId']) & (mdb_symbols['isEnabled'] == symbol['isEnabled']) & (mdb_symbols['name'] == symbol['name']) & (mdb_symbols['type'] == symbol['type']) #Insert if not in MongoDB if mdb_symbols.loc[mask].empty: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Inserting new symbol: " + symbol["symbol"] + " ", length = 50) self.db.iex_symbols.insert_one( symbol.to_dict() ) else: #Update progress bar printProgressBar(index+1, len(symbols.index), prefix = 'Progress:', suffix = "Symbol " + symbol["symbol"] + " already exists ", length = 50)
def delete_duplicate_balancesheets(self, ref_date = "1990-01-01", when = "on"): """ Delete duplicates prices in MongoDB @params: ref_date - Optional : date YYYY-MM-DD (Str) when - Optional : on, latest (Str) """ mdb_query = Query() mdb_symbols = mdb_query.get_symbols() #mdb_symbols = mdb_symbols.iloc[ 999: , : ] mdb_symbols.reset_index(drop=True, inplace=True) #mdb_symbols = ["A"] #print( mdb_symbols ) printProgressBar(0, len(mdb_symbols.index), prefix = 'Progress:', suffix = '', length = 50) for index, symbol in mdb_symbols.iterrows(): #if index > 10: # break #print( symbol ) query = { "symbol": { "$in": [symbol["symbol"]] }, "reportDate": { "$gte": ref_date } } results = self.db.iex_balancesheets.find( query ).sort("reportDate", DESCENDING) balancesheets = pandas.DataFrame() for doc in results: balancesheets = balancesheets.append( pandas.DataFrame.from_dict(doc, orient='index').T, ignore_index=True, sort=False ) duplicates = balancesheets[balancesheets.duplicated(['reportDate'])] print( duplicates ) # Remove all duplicates in one go #if not duplicates.empty: # self.db.iex_charts.delete_many({"_id":{"$in":duplicates['_id'].tolist()}}) # Remove duplicates if they exist if not duplicates.empty: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "Deleting duplicates for " + symbol["symbol"] + " ", length = 50) #self.db.iex_balancesheets.delete_many({"_id":{"$in":duplicates['_id'].tolist()}}) else: #Update progress bar printProgressBar(index+1, len(mdb_symbols.index), prefix = 'Progress:', suffix = "No duplicates for " + symbol["symbol"] + " ", length = 50)