def gethandiTime(soccerid=0): # url = 'http://www.310win.com/info/1x2exchange.aspx?id=' + str(soccerid) + '&cids=,' + str(companyid) + ',&type=3' url = 'http://www.310win.com/handicap/' + str(soccerid) + '.html' soupInstance = SoupHelper(url) tablelist = soupInstance.gethtmllistwithlabel('table', {'width': '860', 'class':'socai'}) try : trlist = getelementlistwithlabel(tablelist[0], 'tr') count = len(trlist) tr = trlist[count - 1] if isTagClass(tr): tdlist = tr.contents if len(tdlist) > 0: flag = tdlist[0].get_text() if flag.strip() != u'' and flag.strip() != u'澳门': return True else: return False else: return False else: return False except IndexError as e: print url print e return False
def parserOnePageChannel(self, url): instance = SoupHelper(url) ulElement = instance.getOneTagObjWithId('post_container') # for child in ulElement.descendants: for child in ulElement.find_all('li'): thumbnailEle = child.find('div', {'class': 'thumbnail'}) print thumbnailEle.find('a').get('href')
def getImagePages(self): instance = SoupHelper(self.url) pageListContainer = instance.getOneTagObjWithClass('div', 'pagelist') aList = pageListContainer.find_all('a') for a in aList: # print a.get('href').encode('utf-8') self.pageList.append(a.get('href').encode('utf-8'))
def getAllImages(self): self.imageList = [] for onePage in self.pageList: soupObj = SoupHelper(onePage) imgContainer = soupObj.getOneTagObjWithId("post_content") ele = imgContainer.find_all('img') for img in ele: # print img.get('src').encode('utf-8') self.imageList.append(img.get('src').encode('utf-8'))
def parserAllPages(self, url): instance = SoupHelper(url) divElement = instance.getOneTagObjWithClass('div', 'pagination') if not divElement: print url + ' 不合法' print self.pageList return pageList = divElement.find_all('a') lastPage = None foundNextPageFlag = False for child in pageList: pageUrl = child.get('href').encode('utf-8') # print pageUrl if not pageUrl in self.pageList: self.pageList.append(pageUrl) if child.get_text().encode('utf-8') != '下一页': lastPage = pageUrl else: foundNextPageFlag = True # print lastPage if foundNextPageFlag == True: self.parserAllPages(lastPage) else: print self.pageList
#!/usr/bin/env python # -*- coding: utf-8 -*- from BEAUTIFUL_SOUP_HELPER import SoupHelper, isTagClass instance = SoupHelper('http://www.310win.com/info/match/AllScore.aspx') list = instance.gethtmllistwithlabel('table', options={'id': 'live'}) print instance
def getexchange(soccerid=0): # url = 'http://www.310win.com/info/1x2exchange.aspx?id=' + str(soccerid) + '&cids=,' + str(companyid) + ',&type=3' url = 'http://www.310win.com/info/1x2exchange.aspx?id=' + str( soccerid) + '&cids=,16,18,80,81,90,281,517,545,&type=3' print url soupInstance = SoupHelper(url) trlist = soupInstance.gethtmllistwithlabel('tr', {'bgcolor': '#FFFFFF'}) companieslist = [] colorResultStr = '' htmlstr = '' if trlist is None: return '' for tr in trlist: if isTagClass(tr): oneCompany = BetCompany() resultStr = tr.get_text('^').encode('utf-8') unitStrList = resultStr.split('^') # print resultStr oneCompany.companyTitle = unitStrList[0] colorResultStr += unitStrList[0] colorResultStr += ' 转换后: ' colorResultStr += unitStrList[5] colorResultStr += ' ' colorResultStr += unitStrList[6] colorResultStr += ' ' colorResultStr += unitStrList[7] colorResultStr += ' 实际: ' colorResultStr += unitStrList[9] colorResultStr += ' ' colorResultStr += unitStrList[10] colorResultStr += ' ' colorResultStr += unitStrList[11] colorResultStr += ' ' colorResultStr += '\n' tdColor = 'white' titleColor = 'black' if unitStrList[6] != unitStrList[10]: tdColor = 'red' titleColor = 'white' htmlstr += "<tr bgcolor=\"%s\" style=\"color:%s\"><td>%s</td> <td>%s</td><td>%s</td><td>%s</td>" \ "<td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td>" % (tdColor,titleColor,oneCompany.companyTitle[:5], '',unitStrList[5],unitStrList[6],unitStrList[7], '',unitStrList[9],unitStrList[10],unitStrList[11]) try: # 胜平负 oneCompany.winOdd = float(unitStrList[1]) oneCompany.drawOdd = float(unitStrList[2]) oneCompany.loseOdd = float(unitStrList[3]) # 转换后的亚盘 oneCompany.exchange_top = float(unitStrList[5]) # oneCompany.exchange_Handicap = switchHandicap(unitStrList[6]) oneCompany.exchange_bottom = float(unitStrList[7]) # 当前的亚盘 oneCompany.now_top = float(unitStrList[8]) # oneCompany.now_Handicap = switchHandicap(unitStrList[9]) oneCompany.now_top = float(unitStrList[10]) except BaseException as e: print e else: companieslist.append(oneCompany) print "\033[1;31;40m%s\033[0m" % colorResultStr return htmlstr # if sys.argv.__len__()==1: # sys.exit('\033[0;36;40m使用说明:\n1个参数:\n事例: python TodaySoccer.pyc 12344\033[0m') # # if __name__ == '__main__': # getexchange(sys.argv[1]) # getexchange(1444073)