def goal_detail(url): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) aa = root.xpath('//table[@class="mtable"]//tr') list=[] for data in aa: if len(data.xpath('./td/img[@src="/images/row/1.gif"]'))+len(data.xpath('./td/img[@src="/images/row/2.gif"]'))+len(data.xpath('./td/img[@src="/images/row/3.gif"]')) != 0: list.append(data) num_zhu = 0 num_ke = 0 str_goal = "" for data in list: if data.xpath('./td/text()')[0] == '\xa0': num_ke += 1 str_goal = str_goal + str(data.xpath('./td/text()')[2][:-1]) + "分 " + str(num_zhu) + "-" + str(num_ke) + ',' else: if len(list[0].xpath('./td/text()')) == 3: num_zhu += 1 str_goal = str_goal + str(data.xpath('./td/text()')[0][:-1]) + "分 " + str(num_zhu) + "-" + str(num_ke) + ',' else: num_zhu += 1 str_goal = str_goal + str(data.xpath('./td/text()')[1][:-1]) + "分 " + str(num_zhu) + "-" + str(num_ke) + ',' try: if list[0].xpath('./td/text()')[0] == '\xa0': return '客队',list[0].xpath('./td/text()')[2][:-1],str_goal else: if len(list[0].xpath('./td/text()')) == 3: return '主队',list[0].xpath('./td/text()')[0][:-1],str_goal else: return '主队',list[0].xpath('./td/text()')[1][:-1],str_goal except IndexError: return "",-1,""
def get_game_end(url): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) table_game = root.xpath('//tr[@yy]') result = [] for data_game in table_game: if data_game.xpath('./td/span[@class="red"]/text()')[0] == '完': num = data_game.xpath('./@id')[0][1:] duiwu = data_game.xpath('./@gy')[0] game = str(duiwu).split(',') game_name = game[0] changci = data_game.xpath('./td[3]/text()')[0] zhu_name = game[1] ke_name = game[2] num_zhu = data_game.xpath('./td/div[@class="pk"]/a/text()')[0] num_ke = data_game.xpath('./td/div[@class="pk"]/a/text()')[2] str_quan = str(num_zhu)+'-'+str(num_ke) str_ban = data_game.xpath('./td[9]/text()')[0] num_goal = int(num_zhu)+int(num_ke) first_goal_team = None firt_goal_time = None goal_decs = None if int(num_zhu)+int(num_ke) != 0: first_goal_team,firt_goal_time,goal_decs = goal_detail("http://live.500.com/detail.php?fid="+num+"&r=1") result.append((num,game_name,changci,zhu_name,ke_name,str_quan,str_ban,num_goal,first_goal_team,firt_goal_time,goal_decs)) return result
def get_game_list(url, page, count): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) table_game = root.xpath('//tr[@yy]') for data_game in table_game: if data_game.xpath('./td/span[@class="red"]/text()')[0] == '完': game_id = str(data_game.xpath('./@id')[0][1:]) num_zhu = int(data_game.xpath('./td/div[@class="pk"]/a/text()')[0]) pankou = str(data_game.xpath('./td/div[@class="pk"]/a/text()')[1]) num_ke = int(data_game.xpath('./td/div[@class="pk"]/a/text()')[2]) # 欧盘输赢情况 out = tongji.tongji_yapan(num_zhu, num_ke, pankou) result_list = oupei.get_oupei(game_id) result_list.append(out) if ((count != 0) & (count % 50 == 0)): page = page + 1 with open("oupei" + str(page) + ".csv", 'a', encoding="utf-8") as f: f.writelines(','.join(str(i) for i in result_list)) f.write('\n') count = count + 1 return page, count
def get_oupei(game_id): url = 'http://odds.500.com/fenxi/ouzhi-' + game_id + '.shtml' content = http_to_char.get_content_by_url(url) root = etree.HTML(content) result_list = [] for bet_company in list: one = root.xpath('//*[@title="' + bet_company + '"]/..') if len(one) == 0: get_data_simple(result_list) else: get_data(one[0], result_list) return result_list
def get_game(url): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) table_game = root.xpath('//tr[@yy]') for data_game in table_game: if data_game.xpath('./td/span[@class="red"]/text()')[0] == 0: num = data_game.xpath('./@id')[0][1:] duiwu = data_game.xpath('./@gy')[0] game = str(duiwu).split(',') game_name = game[0] zhu_name = game[1] ke_name = game[2] num_zhu = data_game.xpath('./td/div[@class="pk"]/a/text()')[0] num_ke = data_game.xpath('./td/div[@class="pk"]/a/text()')[2] str_ban = data_game.xpath('./td[9]/text()') num_goal = int(num_zhu) + int(num_ke)
def get_yazhi(url): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) yazhi = [] yazhi_str = "" born_3 = root.xpath('//*[@id="3"]/td[5]/table/tbody/tr/td[1]/text()')[0] born_1 = root.xpath('//*[@id="3"]/td[5]/table/tbody/tr/td[2]/text()')[0] born_0 = root.xpath('//*[@id="3"]/td[5]/table/tbody/tr/td[3]/text()')[0] yazhi.append([born_3, born_1, born_0]) dead_3 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr/td[1]/text()')[0] dead_1 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr/td[2]/text()')[0] dead_0 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr/td[3]/text()')[0] yazhi.append([dead_3, dead_1, dead_0]) # for data in yazhi: # yazhi_str += data[0]+","+data[1]+","+data[2]+"|" # return yazhi_str return yazhi
def get_ouzhi(url): content = http_to_char.get_content_by_url(url) root = etree.HTML(content) ouzhi_str = "" ouzhi = [] born_3 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[1]/td[1]/text()')[0] born_1 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[1]/td[2]/text()')[0] born_0 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[1]/td[3]/text()')[0] ouzhi.append([born_3, born_1, born_0]) dead_3 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[2]/td[1]/text()')[0] dead_1 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[2]/td[2]/text()')[0] dead_0 = root.xpath('//*[@id="3"]/td[3]/table/tbody/tr[2]/td[3]/text()')[0] ouzhi.append([dead_3, dead_1, dead_0]) ceri_born_3 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[1]/td[1]/text()')[0] ceri_born_1 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[1]/td[2]/text()')[0] ceri_born_0 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[1]/td[3]/text()')[0] ouzhi.append([ceri_born_3, ceri_born_1, ceri_born_0]) ceri_dead_3 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[2]/td[1]/text()')[0] ceri_dead_1 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[2]/td[2]/text()')[0] ceri_dead_0 = root.xpath( '//*[@id="3"]/td[6]/table/tbody/tr[2]/td[3]/text()')[0] ouzhi.append([ceri_dead_3, ceri_dead_1, ceri_dead_0]) # for data in ouzhi: # ouzhi_str += data[0]+","+data[1]+","+data[2]+"|" # # return ouzhi_str return ouzhi