def query_game_info_day(query_date, query_type="DAY_NEXT"): # 查询某一天之后的数据 if query_type == 'DAY_NEXT': result = DBUtils.executeOne( "SELECT * FROM football_game_info where start_datetime > %s " "order by start_datetime", query_date) else: # 查询某一天的数据 result = DBUtils.executeOne( "SELECT * FROM football_game_info where start_datetime like %s " "order by start_datetime", query_date + '%') if result is None: result = [] return result
def cra_main_page(): print("主页抓取:") # 先用测试数据 # content = testData.str content = cra_data_url("http://odds.500.com/yazhi_jczq.shtml") # print(soup.select("#main-tbody input[type='checkbox']")) selector = etree.HTML(content) # 编号 elemtsNo = selector.xpath('//tbody[@id="main-tbody"]' '//input[@type="checkbox"]/@value') # 第一列 场次 elemts1 = selector.xpath('//tbody[@id="main-tbody"]' '//input[@type="checkbox"]/../text()') # 比赛时间 elemtsTime = selector.xpath( '//tbody[@id="main-tbody"]//input[@type="checkbox"]' '/../../../@date-dtime') # 隐藏的第4列 比赛队名1 elemtsTeam1 = selector.xpath( '//tbody[@id="main-tbody"]//input[@type="checkbox"]' '/../../following-sibling::*[4]/a/text()') # 隐藏的第六列 比赛队名2 elemtsTeam2 = selector.xpath( '//tbody[@id="main-tbody"]//input[@type="checkbox"]' '/../../following-sibling::*[6]/a/text()') # elNo, el1, elTime, name1, name2 # 可以开始存库了 if elemtsNo and len(elemtsNo) > 0: exe_result = DBUtils.executeMany( "replace into football_game_info values (%s, %s, %s, %s, %s)", zip(elemtsNo, elemts1, elemtsTime, elemtsTeam1, elemtsTeam2)) print("主页保存条数:%s" % exe_result) else: print("主页抓取失败:%s" % content)
def query_record2(game_id=729379): cids = query_had_comps(game_id) sql = "" for i in range(cids.__len__()): sql += """ select * from ( SELECT cid, concat(com_name,'初赔') as com_name,insert_datetime ,`m1_0`, `m2_0`, `m2_1`, `m3_0`, `m3_1`, `m3_2`, `m4_0`, `m4_1`, `m4_2`, `m4_3`, `m0_0`, `m1_1`, `m2_2`, `m3_3`, `m4_4` ,order_num ,next.* FROM bifen_index as main ,( SELECT cid as nid,`m1_0` as m0_1,`m2_0` as m0_2,`m2_1` as m1_2,`m3_0` as m0_3, `m3_1` as m1_3, `m3_2`as m2_3, `m4_0` as m0_4, `m4_1` as m1_4, `m4_2` as m2_4, `m4_3` as m3_4 FROM bifen_index WHERE game_id={game_id} and cid ={cid} AND win_type='客胜' order by insert_datetime limit 1 ) as next WHERE game_id={game_id} and main.cid={cid} AND win_type='主胜' order by insert_datetime limit 1) as a1 union all select * from ( SELECT cid, concat(com_name,'即赔') as com_name,insert_datetime ,`m1_0`, `m2_0`, `m2_1`, `m3_0`, `m3_1`, `m3_2`, `m4_0`, `m4_1`, `m4_2`, `m4_3`, `m0_0`, `m1_1`, `m2_2`, `m3_3`, `m4_4` ,order_num ,next.* FROM bifen_index as main ,( SELECT cid as nid,`m1_0` as m0_1, `m2_0` as m0_2, `m2_1` as m1_2, `m3_0` as m0_3, `m3_1` as m1_3, `m3_2`as m2_3, `m4_0` as m0_4, `m4_1` as m1_4, `m4_2` as m2_4, `m4_3` as m3_4 FROM bifen_index WHERE game_id={game_id} and cid ={cid} AND win_type='客胜' order by insert_datetime desc limit 1 ) as next WHERE game_id={game_id} and main.cid={cid} AND win_type='主胜' order by insert_datetime desc limit 1) as a1 """.format(game_id=game_id, cid=cids[i]['cid']) if i != cids.__len__() - 1: sql += " UNION ALL " return DBUtils.execute(sql)
def query_echart_data(game_id): """ 查询比分的echart 数据 """ return DBUtils.execute( "select * from bifen_index_std where game_id=%s and win_type = 'main'" " union all " "select * from bifen_index_std where game_id=%s and win_type = 'next'", (game_id, game_id))
def query_record(game_id=729379): # 查询主胜 客胜,主胜 拼接 客胜 return DBUtils.execute( """ SELECT *,substr(insert_datetime,6,11) as dd from ( SELECT cid, concat(com_name,'即赔') as com_name,insert_datetime ,`m1_0`, `m2_0`, `m2_1`, `m3_0`, `m3_1`, `m3_2`, `m4_0`, `m4_1`, `m4_2`, `m4_3`, `m0_0`, `m1_1`, `m2_2`, `m3_3`, `m4_4` ,order_num ,next.* FROM bifen_index as main ,( SELECT cid as nid,`m1_0` as m0_1, `m2_0` as m0_2, `m2_1` as m1_2, `m3_0` as m0_3, `m3_1` as m1_3, `m3_2`as m2_3, `m4_0` as m0_4, `m4_1` as m1_4, `m4_2` as m2_4, `m4_3` as m3_4 FROM bifen_index WHERE game_id=%(game_id)s AND win_type='客胜' and insert_datetime = (select max(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s) ) as next WHERE game_id=%(game_id)s AND win_type='主胜' and main.cid=next.nid and insert_datetime = (select max(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s) union all SELECT cid,concat(com_name,'初赔') as com_name,insert_datetime ,`m1_0`, `m2_0`, `m2_1`, `m3_0`, `m3_1`, `m3_2`, `m4_0`, `m4_1`, `m4_2`, `m4_3`, `m0_0`, `m1_1`, `m2_2`, `m3_3`, `m4_4`,order_num, next.* FROM bifen_index as main ,( SELECT cid as nid,`m1_0` as m0_1, `m2_0` as m0_2, `m2_1` as m1_2, `m3_0` as m0_3, `m3_1` as m1_3, `m3_2`as m2_3, `m4_0` as m0_4, `m4_1` as m1_4, `m4_2` as m2_4, `m4_3` as m3_4 FROM bifen_index WHERE game_id=%(game_id)s AND win_type='客胜' and insert_datetime = (select min(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s) ) as next WHERE game_id=%(game_id)s AND win_type='主胜' and main.cid=next.nid and insert_datetime = (select min(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s) ) as all_data ORDER BY order_num,insert_datetime """, {"game_id": game_id})
def query_had_comps(game_id): """ 查询已经抓取的公司 :return: """ return DBUtils.execute( "SELECT cid,order_num FROM `bifen_index` WHERE game_id=%(game_id)s " "GROUP BY cid,order_num ORDER BY order_num", {"game_id": game_id})
def query_jishioupei_echart3(gameId): """ :param gameId: :return: 返回即时让球欧赔凯里 """ sql = """select kelly_win_num,kelly_avg_num,kelly_lost_num from pay_info where game_id=%s and insert_datetime=(select max(insert_datetime) from pay_info where game_id=%s)""" return DBUtils.executeOne(sql,(gameId,gameId))
def queryMatchCount(): """ 查询所有日期 :return: """ return DBUtils.execute( "SELECT left(start_datetime,10) dd FROM football_game_info " "GROUP BY left(start_datetime,10) ORDER BY dd desc")
def insert_bodan_std(bodan_std): """ 插入波胆标准差 :param bodan_std: :return: 插入条数 """ return DBUtils.executeOne( "INSERT INTO bifen_bodan_std(game_id,win_bodan_std,dogfall_bodan_std" ",lose_bodan_std) values (%s,%s,%s,%s)", bodan_std)
def query_oupei_echart3(gameId): """ 查询对应比赛的初始凯利值 :param gameId: 比赛编号 :return: """ sql = """select kelly_win_num,kelly_avg_num,kelly_lost_num from rangqiu_startvalue_info where game_id=%s and insert_datetime=(select max(insert_datetime) from rangqiu_startvalue_info where game_id=%s)""" return DBUtils.executeOne(sql,(gameId,gameId))
def query_bodan_std(game_id): """ 查询波胆平方差 :param game_id: :return: """ return DBUtils.queryNoDict( "select win_bodan_std, dogfall_bodan_std, lose_bodan_std, " " substr(insert_datetime,6,11) as insert_datetime " "from bifen_bodan_std where game_id=%s", game_id)
def query_game_dates(before_days=12): """ 查询12天之后的日期 """ dd = (datetime.today() + timedelta(days=-before_days)).strftime('%Y-%m-%d') all_date = DBUtils.executeOne( "SELECT substr(start_datetime,1,10) as start_datetime FROM football_game_info where" " start_datetime>%s group by substr(start_datetime,1,10) " "order by substr(start_datetime,1,10) desc", dd) if all_date is None: all_date = [] return all_date
def query_oupei_gameId(): """获取game_id列表""" sql = """select game_id from oupei_startvalue_info""" game_id = DBUtils.executeOne(sql) gameId_list = [] for gameId in game_id: gameId_id = gameId["game_id"] gameId_list.append(gameId_id) # print(gameId_list) return set(gameId_list)
def query_oupei_echart2(gameId): """ 查询对应比赛的初始凯利值 :param gameId: 比赛编号 :return: """ sql="""select kelly_win_num,kelly_avg_num,kelly_lost_num from oupei_startvalue_info where game_id=%s and insert_datetime=(select max(insert_datetime) from oupei_startvalue_info where game_id=%s)""" #select * from oupei_startvalue_info where game_id='807259' and insert_datetime=(select max(insert_datetime) from oupei_startvalue_info where game_id='807259'); try: return DBUtils.executeOne(sql, (gameId,gameId)) except Exception as e: return None
def query_oupei_echart(dd, gameId): """ 查询指定日期到现在的数据库 :param dd: 日期 :param gameId: 比赛编号 :return: 结果 """ sql = """select game_id, GROUP_CONCAT( eu_win_num SEPARATOR ',' ) AS eu_win_num, GROUP_CONCAT( eu_avg_num SEPARATOR ',' ) AS eu_avg_num, GROUP_CONCAT( eu_lost_num SEPARATOR ',' ) AS eu_lost_num, GROUP_CONCAT( kelly_win_num SEPARATOR ',' ) AS kelly_win_num, GROUP_CONCAT( kelly_avg_num SEPARATOR ',' ) AS kelly_avg_num, GROUP_CONCAT( kelly_lost_num SEPARATOR ',' ) AS kelly_lost_num, insert_datetime FROM oupei_info where insert_datetime > %s and game_id=%s GROUP BY game_id,insert_datetime""" return DBUtils.executeOne(sql, (dd, gameId))
def process_sub_page3( testElemNo=None, first_tmp_url="http://odds.500.com/fenxi/rangqiu-%s.shtml?lot=jczq", next_tmp_url="http://odds.500.com/fenxi1/rangqiu.php?id=%s&ctype=1&" "start=%d&r=1&style=0&guojia=0&chupan=0&lot=jczq", table_name="rangqiu_startvalue_info"): startTotalTime = time.time() if testElemNo is not None: elemNoList = testElemNo else: elemNoList = DBUtils.execute( "select game_no,start_datetime from football_game_info " "where start_datetime>now() order by start_datetime ") all_sub_data = [] str_sql = "insert into " + table_name + "(kelly_win_num, kelly_avg_num, " \ "kelly_lost_num,game_id) values(%s,%s,%s,%s) " # 设置解析列索引 params = {} if table_name != "pay_info": params = {"col_index": 2} if not isinstance(elemNoList, list): print("不是数组:", end="") print(elemNoList) return [] """ 解析让球指数页面 """ for elObj in elemNoList: startTime = time.time() time.sleep(ConstantVal.TIMER_INTERVAL) elNo = elObj["game_no"] url = first_tmp_url % elNo # 子页面第一页的数据,存在多页的情况 all_grade_data = analysis_all_sub_page3(cra_data_url(url, "utf-8"), **params) # 子页面其他页的数据 start = 30 while True: url = next_tmp_url % (elNo, start) htmlData = cra_data_url(url, "utf-8") if htmlData and len(htmlData.strip()) > 100: result = analysis_all_sub_page3(htmlData, 'tr', **params) all_grade_data = concat_array(all_grade_data, result) start += 30 else: break # 计算平方差 if len(all_grade_data) > 0: avg_data = [ str(float('%.3f' % cal_std2(bb))) for bb in all_grade_data ] avg_data.append(elNo) all_sub_data.append(avg_data) # count = DBUtils.executeOne(str_sql, avg_data) print( str(elNo) + " " + table_name + " 子页总共抓取条数:%s 耗时:%s" % (start, utils.float_num(time.time() - startTime))) count = DBUtils.executeMany(str_sql, all_sub_data) print(all_sub_data) print("子页面保存数量:%s 耗时:%s" % (count, utils.float_num(time.time() - startTotalTime))) return elemNoList
def parse_bifen(game_id=729379): time.sleep(ConstantVal.TIMER_INTERVAL) startTime = time.time() bifen_url = "http://odds.500.com/fenxi/bifen-%s.shtml" % game_id bifen_html = cra_data.cra_data_url(bifen_url) # 只解析class 为pub_table的table 即可 only_table = SoupStrainer("table", class_="pub_table") soup = BeautifulSoup(bifen_html, "lxml", parse_only=only_table) if not soup: print("%s 内容无效" % game_id) return # 获取比分,find搜索到一个就会停止搜索,搜索全部要使用 find_all scores_th = soup.find('tr').contents[5:] scores = [text.get_text() for text in scores_th if type(text) is bs4.element.Tag] # 0:0的位置 dogfall_index = scores.index("0:0") # 所有赔率 # 威廉希尔:293、Bwin:5、Bet365:3、Eurobet:15 1Bet:671、金宝博:348、澳门:11、18Bet:863 # 用于排序 老的 ["威廉希尔", "Bet365", "Bwin", "澳门"] + ["Eurobet", "1Bet", "金宝博"] specify_cids = [293, 5, 3, 15]+[863, 348, 11] main_comp = ["威廉希尔", "Bwin", "Bet365", "Eurobet"] + ["18Bet", "金宝博", "澳门"] # 用于在网页上查找所有 有class属性的tr all_trs = soup.find_all("tr", {"class": re.compile(".*")}) find_table_bifen_result = [] main_win_data = [] next_win_data = [] col_pay_data = [] for tr in all_trs: # 获取编号 cid = parse_qs(urlparse(tr.a.get('href')).query)['cid'] # 获取横着的整行数据 temp_list = list(tr.stripped_strings) # 平局赔付 avg_pay_list = temp_list[(dogfall_index + 2) * 2:] com_name = temp_list[1] order_num = [(main_comp.index(com_name) + 1) * 10 if com_name in main_comp else 100] # 主胜的赔率 main_win_pay = [ps for ps in temp_list[2:(dogfall_index + 2) * 2][::2]] # 客胜的赔率 next_win_pay = [ps for ps in temp_list[2:(dogfall_index + 2) * 2][1::2]] # 主胜 temp_list_main = [game_id] + cid + temp_list[:2] + order_num + main_win_pay + avg_pay_list # 客胜 temp_list_next = [game_id] + cid + temp_list[:2] + order_num + next_win_pay + avg_pay_list # 将客胜、平局、主胜的赔率存入数组 col_pay_data.append(main_win_pay[1:] + avg_pay_list + next_win_pay[1:]) main_win_data.append(temp_list_main) next_win_data.append(temp_list_next) # if com_name in com_name and cid and int(cid[0]) in specify_cids: find_table_bifen_result.append(temp_list_main) find_table_bifen_result.append(temp_list_next) # 比分存入数据库 if find_table_bifen_result.__len__() > 0: result_count = DBUtils.executeMany( "insert into bifen_index(game_id,`cid`,`html_index`,`com_name`,order_num,win_type,`m1_0`,`m2_0`,`m2_1`" ",`m3_0`,`m3_1`," "`m3_2`,`m4_0`,`m4_1`,`m4_2`,`m4_3`,`m0_0`,`m1_1`,`m2_2`,`m3_3`,`m4_4`) values (%s,%s,%s,%s,%s," "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", find_table_bifen_result) print("%s bifen存库条数:%s 耗时:%s" % (game_id, result_count, utils.float_num(time.time() - startTime))) # 计算波胆的标准差,然后存入到标准差表 if col_pay_data and len(col_pay_data) > 0: # 计算波胆平方差 calc_bodan_std_result = bodan_std(col_pay_data) result = bifenImpl.insert_bodan_std([game_id] + calc_bodan_std_result) print("%s 保存波胆标准差数据条数:%s" % (game_id, result))
def insert_std_data(std_data): """ 保存比分的标准差 """ return DBUtils.executeMany( "INSERT INTO bifen_index_std(win_type,m1_0,m2_0,m2_1,m3_0,m3_1,m3_2" ",m4_0,m4_1,m4_2,m4_3,m0_0,m1_1,m2_2,m3_3,m4_4," "game_id)VALUES(" + "%s," * 16 + "%s" + ") ", std_data)