def getCurrentAnd52WeekHighLow(url):
    try:
        parsedHTML = getParsedHTML(url)
        curr = parsedHTML.findAll(attrs={"class": "FL PR5 gD_30"})
        curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 rD_30"}))
        curr.extend(parsedHTML.findAll(attrs={"class": "FL PR5 bD_30"}))
        curr = float(getText(curr[-1]))
        high = parsedHTML.find(attrs={"id": "n_52high"})
        low = parsedHTML.find(attrs={"id": "n_52low"})
        if high == None:
            high = parsedHTML.find(attrs={"id": "b_52high"})
            low = parsedHTML.find(attrs={"id": "b_52low"})
        if high == None:
            return None
        high = getText(high)
        low = getText(low)
        return (curr, float(high), float(low))
    except:
        #print "error in getting low high for ", url
        pass
def getCurrentAnd52WeekHighLow(url):
    try:
        parsedHTML = getParsedHTML(url)
        curr = parsedHTML.findAll(attrs={"class":"FL PR5 gD_30"})
        curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 rD_30"}))
        curr.extend(parsedHTML.findAll(attrs={"class":"FL PR5 bD_30"}))
        curr = float(getText(curr[-1]))
        high = parsedHTML.find(attrs={"id":"n_52high"})
        low = parsedHTML.find(attrs={"id":"n_52low"})
        if high == None:
            high = parsedHTML.find(attrs={"id":"b_52high"})
            low = parsedHTML.find(attrs={"id":"b_52low"})
        if high == None:
            return None
        high = getText(high)
        low = getText(low)
        return(curr, float(high), float(low))
    except:
        #print "error in getting low high for ", url
        pass
Example #3
0
def searchSymbol(symbol):
    driver = webdriver.Chrome()
    time.sleep(2)
    driver.get("http://www.moneycontrol.com/")
    time.sleep(2)
    if len(symbol) < 2:
        return []
    while True:
        id = driver.find_elements_by_id("search_str")
        if len(id) > 0:
            id[0].send_keys(symbol)
            id[0].submit()
            break
        else:
            time.sleep(2)
    mcURL = str(driver.current_url.encode('UTF-8'))
    output = []
    if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'):
        mcURLsplit = mcURL.split('/')
        mcSymbol = mcURLsplit[-1]
        mcName = mcURLsplit[-2]
        mcSector = mcURLsplit[-3]
        output = [mcSector, mcName, mcSymbol, mcURL]
    else:
        parsedHTML = getParsedHTML(driver.current_url)
        table = parsedHTML.find(attrs={'class': 'srch_tbl'}).findAll('tr')
        try:
            for row in table:
                entry = row.findAll('td')[1]
                entry = getText(entry).split(' ')
                exitFlag = False
                for k in entry:
                    if len(k) > 0 and k[0] == ':':
                        if k[1:] == symbol[j]:
                            mcURL = str(
                                row.find('td').a['href'].encode('UTF-8'))
                            mcURLsplit = mcURL.split('/')
                            mcSymbol = mcURLsplit[-1]
                            mcName = mcURLsplit[-2]
                            mcSector = mcURLsplit[-3]
                            output = [mcSector, mcName, mcSymbol, mcURL]
                            exitFlag = True
                            break
                if exitFlag:
                    break
        except:
            driver.close()
            return
    driver.close()
    return output
Example #4
0
def getSymbolFromMCurl(url):
    try:
        parsedHTML = getParsedHTML(url)
        info = getText(parsedHTML.find(attrs={"class": "FL gry10"}))
        info = info.replace(' ', '').split('|')
        bse = ""
        if len(info[0]) > 4:
            bse = info[0][4:]
        nse = ""
        if len(info[1]) > 4:
            nse = info[1][4:]
        return (nse, bse)
    except:
        return None
Example #5
0
def getSymbolFromMCurl(url):
    try:
        parsedHTML = getParsedHTML(url)
        info = getText(parsedHTML.find(attrs={"class":"FL gry10"}))
        info = info.replace(' ','').split('|')
        bse = ""
        if len(info[0])>4:
            bse = info[0][4:]
        nse = ""
        if len(info[1])>4:
            nse = info[1][4:]
        return (nse,bse)
    except:
        return None
Example #6
0
def updateSymbols():
    DATA = []
    data = []
    INDEX = {}
    counter = 0
    alphabets = list(ascii_uppercase)
    alphabets.append('others')
    for i in alphabets:  #26 alphabets and one where stocks start with numeral
        try:
            print 'updating from page ' + i + ' out of z'
            parsedHTML = getParsedHTML(getURL(i))
            table = parsedHTML.findAll(
                lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12'])
            size = len(table)
            tempInsert = {}
            for j in range(1, size):  #0 is for header
                try:
                    print i, j, 'out of', size
                    row = table[j]
                    name = getText(row)
                    if name == '':
                        continue
                    mcURL = str(row['href'].encode('UTF-8'))
                    mcURLsplit = mcURL.split('/')
                    symbol = getSymbolFromMCurl(mcURL)
                    if symbol == None:
                        continue
                    nse, bse = symbol
                    DATA.append([name, nse, bse])
                    data.append([name, nse, bse])
                    DATA[counter].extend(mcURLsplit[-3:])
                    DATA[counter].append(mcURL)
                    if nse != "":
                        tempInsert[nse] = counter
                    if bse != "":
                        tempInsert[bse] = counter
                    tempInsert[mcURL] = counter
                    counter = counter + 1
                except:
                    print 'except 1'

            INSERT = INSERT.update(tempInsert)
        except:
            print 'except 2'

            #0 is the name, 1 is NSE symbol, 2 is BSE symbol
    dumpData(DATA, 'data/symbolsMCupdated.p')
    dumpData(INDEX, 'data/index.p')
    dumpData(data, 'data/symbols.p')
Example #7
0
def searchSymbol(symbol):
    driver = webdriver.Chrome()
    time.sleep(2)
    driver.get("http://www.moneycontrol.com/")
    time.sleep(2)
    if len(symbol)<2:
        return []
    while True:
        id = driver.find_elements_by_id("search_str")
        if len(id)>0:
            id[0].send_keys(symbol)
            id[0].submit()
            break
        else: time.sleep(2)
    mcURL = str(driver.current_url.encode('UTF-8'))
    output = []
    if mcURL.startswith('http://www.moneycontrol.com/india/stockpricequote/'):
            mcURLsplit = mcURL.split('/')
            mcSymbol = mcURLsplit[-1]
            mcName = mcURLsplit[-2]
            mcSector = mcURLsplit[-3]
            output = [mcSector, mcName, mcSymbol,mcURL]
    else:
        parsedHTML = getParsedHTML(driver.current_url)
        table = parsedHTML.find(attrs={'class':'srch_tbl'}).findAll('tr')
        try:
            for row in table:
                entry = row.findAll('td')[1]
                entry = getText(entry).split(' ')
                exitFlag = False
                for k in entry:
                    if len(k)>0 and k[0] == ':':
                        if k[1:] == symbol[j]:
                            mcURL = str(row.find('td').a['href'].encode('UTF-8'))
                            mcURLsplit = mcURL.split('/')
                            mcSymbol = mcURLsplit[-1]
                            mcName = mcURLsplit[-2]
                            mcSector = mcURLsplit[-3]
                            output = [mcSector, mcName, mcSymbol,mcURL]
                            exitFlag = True
                            break
                if exitFlag:
                    break
        except:
            driver.close()
            return
    driver.close()
    return output
Example #8
0
def updateSymbols():
    DATA = []
    data = []
    INDEX = {}
    counter = 0
    alphabets = list(ascii_uppercase)
    alphabets.append('others')
    for i in alphabets:#26 alphabets and one where stocks start with numeral
        try:
            print 'updating from page '+i+ ' out of z'
            parsedHTML = getParsedHTML(getURL(i))
            table = parsedHTML.findAll(lambda tag: tag.name == 'a' and tag.get('class') == ['bl_12'])
            size = len(table)
            tempInsert = {}
            for j in range(1,size): #0 is for header
                try:
                    print i, j,'out of',size
                    row = table[j]
                    name = getText(row)
                    if name == '':
                        continue
                    mcURL = str(row['href'].encode('UTF-8'))
                    mcURLsplit = mcURL.split('/')
                    symbol = getSymbolFromMCurl(mcURL)
                    if symbol == None:
                        continue
                    nse,bse = symbol
                    DATA.append([name, nse, bse])
                    data.append([name, nse, bse])
                    DATA[counter].extend(mcURLsplit[-3:])
                    DATA[counter].append(mcURL)
                    if nse != "":
                        tempInsert[nse] = counter
                    if bse != "":
                        tempInsert[bse] = counter
                    tempInsert[mcURL] = counter
                    counter = counter + 1
                except:
                    print 'except 1'

            INSERT = INSERT.update(tempInsert)
        except:
            print 'except 2'

            #0 is the name, 1 is NSE symbol, 2 is BSE symbol
    dumpData(DATA, 'data/symbolsMCupdated.p')
    dumpData(INDEX,'data/index.p')
    dumpData(data,'data/symbols.p')
def getDebtList(index):
    DIR = 'data/financials/' + str(index) + '/'
    DEratioData = load(DIR + 'ratios.p')
    parsedData = getParsedSoupFromHTML(DEratioData)
    tables = parsedData.findAll('table')
    l = len(tables)
    for i in range(l - 1, -1, -1):
        data = getText(tables[i]).strip('\n')
        if data == '':
            continue
        lines = data.split('\n')
        if lines[0].startswith('Data Not Available'):
            return None
        else:
            try:
                month[lines[0][:3]]
            except:
                continue
            #try:
            if month[lines[0][:3]] == 1:
                j = 0
                lines = map(stripNewLine, lines)
                try:
                    while True:
                        mon = lines[j].split(' ')[0]
                        if month[mon] == 1:
                            j = j + 1
                except:
                    if j == 0:
                        return None
                months = j
                while j < len(lines):
                    if lines[j] == 'Debt Equity Ratio':
                        DEratio = []
                        if lines[j + 1] == "":
                            j = j + 1
                        for k in range(j + 1, min(j + 1 + 3, j + 1 + months)):
                            if lines[k] == '--':
                                return DEratio
                            DEratio.append(float(lines[k]))
                        return DEratio
                    j = j + 1
            #except:
            #    continue
    return None
Example #10
0
def processAndDump(html, file):
    parsedData = getParsedSoupFromHTML(html)
    tables = parsedData.findAll('table')
    l = len(tables)
    for i in range(l - 1, -1, -1):
        data = getText(tables[i]).strip('\n')
        if data == '':
            continue
        lines = data.split('\n')
        #print lines[0]
        if lines[0].startswith('Data Not Available'):
            data = ""
            write(file, data)
            return None
        else:
            try:
                if month[lines[0][:3]] == 1:
                    write(file, data)
                    return processDict(lines)
            except:
                continue
    return None