def saveAnnualDict(index): DIR = 'data/financials/' + str(index) + '/' annualData = load(DIR + 'annualFinancialData.p') DictData = processAndDump(annualData, DIR + 'annualData.txt') if DictData is not None: DictData['index'] = index dumpData(DictData, DIR + 'annualDict.p')
def saveState(self): filteredSortedList = dict([ (i, (v, l)) for (i, (v, l)) in self.filteredFinancialData.items() if v is not None and v[0] is not None and v[1] is not None and l > 1 ]) dumpData(filteredSortedList, 'storedState.p')
def removeFromExclude(self, symbol): try: index = self.indexes[symbol] while self.excludeSymbolList.count(index) > 0: self.excludeSymbolList.remove(index) dumpData(self.excludeSymbolList, 'data/excludeSymbols.p') except: print 'Symbol not found'
def updateDictsWithDEratio(a, b): stockSymbols = load('data/symbolsMCupdated.p') for index in range(a, b): print index, 7630, stockSymbols[index][1], stockSymbols[index][2] DIR = 'data/financials/' + str(index) + '/' annual = load(DIR + 'annualDict.p') if annual is not None: annual['DEratio'] = getDebtList(index) dumpData(annual, DIR + 'annualDict.p')
def saveQuarterDict(index): DIR = 'data/financials/' + str(index) + '/' quarterData1 = load(DIR + 'quarterFinancialData1.p') DictData1 = processAndDump(quarterData1, DIR + 'quarterData1.txt') if DictData1 is None: return quarterData2 = load(DIR + 'quarterFinancialData2.p') DictData2 = processAndDump(quarterData2, DIR + 'quarterData2.txt') DictData = combine(DictData1, DictData2) if DictData is not None: DictData['index'] = index dumpData(DictData, DIR + 'quarterDict.p')
def download_split_data_from_URL(url,index): try: if url.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = url.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] annualFinancialURL = moneycontrolURL+'company-facts/'+mcName+'/splits/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'splits.p') except: print 'error'
def updateDicts(a, b): for index in range(a, b): print index, 7630 DIR = 'data/financials/' + str(index) + '/' try: annual = load(DIR + 'annualDict.p') if annual is not None: annual['index'] = index dumpData(annual, DIR + 'annualDict.p') except: pass try: quarter = load(DIR + 'quarterDict.p') if quarter is not None: quarter['index'] = index dumpData(quarter, DIR + 'quarterDict.p') except: pass
def insertSymbolinDatabase(symbol): index = load('data/index.p') try: print index[symbol] print symbol + 'is already in our database' except: result = searchSymbol(symbol) if result == []: print 'symbol not found' return nse, bse = getSymbolFromMCurl(result[-1]) symbols = load('data/symbols.p') data = load('data/symbolsMCupdated.p') l = len(data) symbols.append(['', nse, bse]) data.append(['', nse, bse]) data[-1].extend(result) if nse != "": index[nse] = l if bse != "": index[bse] = l dumpData(index, 'data/index.p') dumpData(symbols, 'data/symbols.p') dumpData(data, 'data/symbolsMCupdated.p') return True
def saveFinancialData(data, index): directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(data[0], directory+'annualFinancialData.p') dumpData(data[1], directory+'quarterFinancialData1.p') dumpData(data[2], directory+'quarterFinancialData2.p')
def insertSymbolinDatabase(symbol): index = load('data/index.p') try: print index[symbol] print symbol + 'is already in our database' except: result = searchSymbol(symbol) if result == []: print 'symbol not found' return nse,bse = getSymbolFromMCurl(result[-1]) symbols = load('data/symbols.p') data = load('data/symbolsMCupdated.p') l = len(data) symbols.append(['',nse,bse]) data.append(['',nse,bse]) data[-1].extend(result) if nse != "": index[nse] = l if bse != "": index[bse] = l dumpData(index, 'data/index.p') dumpData(symbols, 'data/symbols.p') dumpData(data, 'data/symbolsMCupdated.p') return True
def download_other_fin_data_from_URL(url,index): try: if url.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = url.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] annualFinancialURL = moneycontrolURL+'financials/'+mcName+'/balance-sheet/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'balanceSheet.p') annualFinancialURL = moneycontrolURL+'financials/'+mcName+'/cash-flow/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'cash_flow.p') annualFinancialURL = moneycontrolURL+'financials/'+mcName+'/ratios/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'ratios.p') annualFinancialURL = moneycontrolURL+'financials/'+mcName+'/profit-loss/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'profit_loss.p') annualFinancialURL = moneycontrolURL+'financials/'+mcName+'/capital-structure/'+mcSymbol+'#'+mcSymbol annualDataHTML = getHTML(annualFinancialURL) directory = 'data/financials/'+str(index)+'/' if not os.path.exists(directory): os.makedirs(directory) dumpData(annualDataHTML, directory+'capital_structure.p') except: print 'error'
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets: #26 alphabets and one where stocks start with numeral try: print 'updating from page ' + i + ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll( lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1, size): #0 is for header try: print i, j, 'out of', size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse, bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX, 'data/index.p') dumpData(data, 'data/symbols.p')
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets:#26 alphabets and one where stocks start with numeral try: print 'updating from page '+i+ ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll(lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1,size): #0 is for header try: print i, j,'out of',size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse,bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX,'data/index.p') dumpData(data,'data/symbols.p')
def excludeSymbol(self, symbol): try: self.excludeSymbolList.append(self.indexes[symbol]) dumpData(self.excludeSymbolList, 'data/excludeSymbols.p') except: print 'Symbol not found'