def searchSymbol(symbol): driver = webdriver.Chrome() time.sleep(2) driver.get("http://www.moneycontrol.com/") time.sleep(2) if len(symbol) < 2: return [] while True: id = driver.find_elements_by_id("search_str") if len(id) > 0: id[0].send_keys(symbol) id[0].submit() break else: time.sleep(2) mcURL = str(driver.current_url.encode('UTF-8')) output = [] if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol, mcURL] else: parsedHTML = getParsedHTML(driver.current_url) table = parsedHTML.find(attrs={'class': 'srch_tbl'}).findAll('tr') try: for row in table: entry = row.findAll('td')[1] entry = getText(entry).split(' ') exitFlag = False for k in entry: if len(k) > 0 and k[0] == ':': if k[1:] == symbol[j]: mcURL = str( row.find('td').a['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol, mcURL] exitFlag = True break if exitFlag: break except: driver.close() return driver.close() return output
def getSymbolFromMCurl(url): try: parsedHTML = getParsedHTML(url) info = getText(parsedHTML.find(attrs={"class": "FL gry10"})) info = info.replace(' ', '').split('|') bse = "" if len(info[0]) > 4: bse = info[0][4:] nse = "" if len(info[1]) > 4: nse = info[1][4:] return (nse, bse) except: return None
def getSymbolFromMCurl(url): try: parsedHTML = getParsedHTML(url) info = getText(parsedHTML.find(attrs={"class":"FL gry10"})) info = info.replace(' ','').split('|') bse = "" if len(info[0])>4: bse = info[0][4:] nse = "" if len(info[1])>4: nse = info[1][4:] return (nse,bse) except: return None
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets: #26 alphabets and one where stocks start with numeral try: print 'updating from page ' + i + ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll( lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1, size): #0 is for header try: print i, j, 'out of', size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse, bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX, 'data/index.p') dumpData(data, 'data/symbols.p')
def searchSymbol(symbol): driver = webdriver.Chrome() time.sleep(2) driver.get("http://www.moneycontrol.com/") time.sleep(2) if len(symbol)<2: return [] while True: id = driver.find_elements_by_id("search_str") if len(id)>0: id[0].send_keys(symbol) id[0].submit() break else: time.sleep(2) mcURL = str(driver.current_url.encode('UTF-8')) output = [] if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'): mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol,mcURL] else: parsedHTML = getParsedHTML(driver.current_url) table = parsedHTML.find(attrs={'class':'srch_tbl'}).findAll('tr') try: for row in table: entry = row.findAll('td')[1] entry = getText(entry).split(' ') exitFlag = False for k in entry: if len(k)>0 and k[0] == ':': if k[1:] == symbol[j]: mcURL = str(row.find('td').a['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') mcSymbol = mcURLsplit[-1] mcName = mcURLsplit[-2] mcSector = mcURLsplit[-3] output = [mcSector, mcName, mcSymbol,mcURL] exitFlag = True break if exitFlag: break except: driver.close() return driver.close() return output
def updateSymbols(): DATA = [] data = [] INDEX = {} counter = 0 alphabets = list(ascii_uppercase) alphabets.append('others') for i in alphabets:#26 alphabets and one where stocks start with numeral try: print 'updating from page '+i+ ' out of z' parsedHTML = getParsedHTML(getURL(i)) table = parsedHTML.findAll(lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12']) size = len(table) tempInsert = {} for j in range(1,size): #0 is for header try: print i, j,'out of',size row = table[j] name = getText(row) if name == '': continue mcURL = str(row['href'].encode('UTF-8')) mcURLsplit = mcURL.split('/') symbol = getSymbolFromMCurl(mcURL) if symbol == None: continue nse,bse = symbol DATA.append([name, nse, bse]) data.append([name, nse, bse]) DATA[counter].extend(mcURLsplit[-3:]) DATA[counter].append(mcURL) if nse != "": tempInsert[nse] = counter if bse != "": tempInsert[bse] = counter tempInsert[mcURL] = counter counter = counter + 1 except: print 'except 1' INSERT = INSERT.update(tempInsert) except: print 'except 2' #0 is the name, 1 is NSE symbol, 2 is BSE symbol dumpData(DATA, 'data/symbolsMCupdated.p') dumpData(INDEX,'data/index.p') dumpData(data,'data/symbols.p')
def getCurrentAnd52WeekHighLow(url): try: parsedHTML = getParsedHTML(url) curr = parsedHTML.findAll(attrs={"class":"FL PR5 gD_30"}) curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 rD_30"})) curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 bD_30"})) curr = float(getText(curr[-1])) high = parsedHTML.find(attrs={"id":"n_52high"}) low = parsedHTML.find(attrs={"id":"n_52low"}) if high == None: high = parsedHTML.find(attrs={"id":"b_52high"}) low = parsedHTML.find(attrs={"id":"b_52low"}) if high == None: return None high = getText(high) low = getText(low) return(curr, float(high), float(low)) except: #print "error in getting low high for ", url pass
def getCurrentAnd52WeekHighLow(url): try: parsedHTML = getParsedHTML(url) curr = parsedHTML.findAll(attrs={"class": "FL PR5 gD_30"}) curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 rD_30"})) curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 bD_30"})) curr = float(getText(curr[-1])) high = parsedHTML.find(attrs={"id": "n_52high"}) low = parsedHTML.find(attrs={"id": "n_52low"}) if high == None: high = parsedHTML.find(attrs={"id": "b_52high"}) low = parsedHTML.find(attrs={"id": "b_52low"}) if high == None: return None high = getText(high) low = getText(low) return (curr, float(high), float(low)) except: #print "error in getting low high for ", url pass