def getCurrentAnd52WeekHighLow(url): try: parsedHTML = getParsedHTML(url) curr = parsedHTML.findAll(attrs={"class": "FL PR5 gD_30"}) curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 rD_30"})) curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 bD_30"})) curr = float(getText(curr[-1])) high = parsedHTML.find(attrs={"id": "n_52high"}) low = parsedHTML.find(attrs={"id": "n_52low"}) if high == None: high = parsedHTML.find(attrs={"id": "b_52high"}) low = parsedHTML.find(attrs={"id": "b_52low"}) if high == None: return None high = getText(high) low = getText(low) return (curr, float(high), float(low)) except: #print "error in getting low high for ", url pass
def getCurrentAnd52WeekHighLow(url): try: parsedHTML = getParsedHTML(url) curr = parsedHTML.findAll(attrs={"class":"FL PR5 gD_30"}) curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 rD_30"})) curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 bD_30"})) curr = float(getText(curr[-1])) high = parsedHTML.find(attrs={"id":"n_52high"}) low = parsedHTML.find(attrs={"id":"n_52low"}) if high == None: high = parsedHTML.find(attrs={"id":"b_52high"}) low = parsedHTML.find(attrs={"id":"b_52low"}) if high == None: return None high = getText(high) low = getText(low) return(curr, float(high), float(low)) except: #print "error in getting low high for ", url pass
def searchSymbol(symbol): driver = webdriver.Chrome() time.sleep(2) driver.get("http://www.moneycontrol.com/") time.sleep(2) if len(symbol) < 2: return [] while True: id = driver.find_elements_by_id("search_str") if len(id) > 0: id[0].send_keys(symbol) id[0].submit() break else: time.sleep(2) mcURL = str(driver.current_url.encode('UTF-8')) output = [] if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol, mcURL] else: parsedHTML = getParsedHTML(driver.current_url) table = parsedHTML.find(attrs={'class': 'srch_tbl'}).findAll('tr') try: for row in table: entry = row.findAll('td')[1] entry = getText(entry).split(' ') exitFlag = False for k in entry: if len(k) > 0 and k[0] == ':': if k[1:] == symbol[j]: mcURL = str( row.find('td').a['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol, mcURL] exitFlag = True break if exitFlag: break except: driver.close() return driver.close() return output
def getSymbolFromMCurl(url): try: parsedHTML = getParsedHTML(url) info = getText(parsedHTML.find(attrs={"class": "FL gry10"})) info = info.replace(' ', '').split('|') bse = "" if len(info[0]) > 4: bse = info[0][4:] nse = "" if len(info[1]) > 4: nse = info[1][4:] return (nse, bse) except: return None
def getSymbolFromMCurl(url): try: parsedHTML = getParsedHTML(url) info = getText(parsedHTML.find(attrs={"class":"FL gry10"})) info = info.replace(' ','').split('|') bse = "" if len(info[0])>4: bse = info[0][4:] nse = "" if len(info[1])>4: nse = info[1][4:] return (nse,bse) except: return None
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets: #26 alphabets and one where stocks start with numeral try: print 'updating from page ' + i + ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll( lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1, size): #0 is for header try: print i, j, 'out of', size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse, bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX, 'data/index.p') dumpData(data, 'data/symbols.p')
def searchSymbol(symbol): driver = webdriver.Chrome() time.sleep(2) driver.get("http://www.moneycontrol.com/") time.sleep(2) if len(symbol)<2: return [] while True: id = driver.find_elements_by_id("search_str") if len(id)>0: id[0].send_keys(symbol) id[0].submit() break else: time.sleep(2) mcURL = str(driver.current_url.encode('UTF-8')) output = [] if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol,mcURL] else: parsedHTML = getParsedHTML(driver.current_url) table = parsedHTML.find(attrs={'class':'srch_tbl'}).findAll('tr') try: for row in table: entry = row.findAll('td')[1] entry = getText(entry).split(' ') exitFlag = False for k in entry: if len(k)>0 and k[0] == ':': if k[1:] == symbol[j]: mcURL = str(row.find('td').a['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol,mcURL] exitFlag = True break if exitFlag: break except: driver.close() return driver.close() return output
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets:#26 alphabets and one where stocks start with numeral try: print 'updating from page '+i+ ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll(lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1,size): #0 is for header try: print i, j,'out of',size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse,bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX,'data/index.p') dumpData(data,'data/symbols.p')
def getDebtList(index): DIR = 'data/financials/' + str(index) + '/' DEratioData = load(DIR + 'ratios.p') parsedData = getParsedSoupFromHTML(DEratioData) tables = parsedData.findAll('table') l = len(tables) for i in range(l - 1, -1, -1): data = getText(tables[i]).strip('\n') if data == '': continue lines = data.split('\n') if lines[0].startswith('Data Not Available'): return None else: try: month[lines[0][:3]] except: continue #try: if month[lines[0][:3]] == 1: j = 0 lines = map(stripNewLine, lines) try: while True: mon = lines[j].split(' ')[0] if month[mon] == 1: j = j + 1 except: if j == 0: return None months = j while j < len(lines): if lines[j] == 'Debt Equity Ratio': DEratio = [] if lines[j + 1] == "": j = j + 1 for k in range(j + 1, min(j + 1 + 3, j + 1 + months)): if lines[k] == '--': return DEratio DEratio.append(float(lines[k])) return DEratio j = j + 1 #except: # continue return None
def processAndDump(html, file): parsedData = getParsedSoupFromHTML(html) tables = parsedData.findAll('table') l = len(tables) for i in range(l - 1, -1, -1): data = getText(tables[i]).strip('\n') if data == '': continue lines = data.split('\n') #print lines[0] if lines[0].startswith('Data Not Available'): data = "" write(file, data) return None else: try: if month[lines[0][:3]] == 1: write(file, data) return processDict(lines) except: continue return None