Beispiel #1
0
def query_game_info_day(query_date, query_type="DAY_NEXT"):
    # 查询某一天之后的数据
    if query_type == 'DAY_NEXT':
        result = DBUtils.executeOne(
            "SELECT * FROM football_game_info where start_datetime > %s "
            "order by start_datetime", query_date)
    else:
        # 查询某一天的数据
        result = DBUtils.executeOne(
            "SELECT * FROM football_game_info where start_datetime like %s "
            "order by start_datetime", query_date + '%')
    if result is None:
        result = []
    return result
Beispiel #2
0
def cra_main_page():
    print("主页抓取:")
    # 先用测试数据
    # content = testData.str
    content = cra_data_url("http://odds.500.com/yazhi_jczq.shtml")
    # print(soup.select("#main-tbody input[type='checkbox']"))
    selector = etree.HTML(content)

    # 编号
    elemtsNo = selector.xpath('//tbody[@id="main-tbody"]'
                              '//input[@type="checkbox"]/@value')
    # 第一列 场次
    elemts1 = selector.xpath('//tbody[@id="main-tbody"]'
                             '//input[@type="checkbox"]/../text()')
    # 比赛时间
    elemtsTime = selector.xpath(
        '//tbody[@id="main-tbody"]//input[@type="checkbox"]'
        '/../../../@date-dtime')
    # 隐藏的第4列 比赛队名1
    elemtsTeam1 = selector.xpath(
        '//tbody[@id="main-tbody"]//input[@type="checkbox"]'
        '/../../following-sibling::*[4]/a/text()')
    # 隐藏的第六列 比赛队名2
    elemtsTeam2 = selector.xpath(
        '//tbody[@id="main-tbody"]//input[@type="checkbox"]'
        '/../../following-sibling::*[6]/a/text()')
    # elNo, el1, elTime, name1, name2
    # 可以开始存库了
    if elemtsNo and len(elemtsNo) > 0:
        exe_result = DBUtils.executeMany(
            "replace into football_game_info values (%s, %s, %s, %s, %s)",
            zip(elemtsNo, elemts1, elemtsTime, elemtsTeam1, elemtsTeam2))
        print("主页保存条数:%s" % exe_result)
    else:
        print("主页抓取失败:%s" % content)
Beispiel #3
0
def query_record2(game_id=729379):
    cids = query_had_comps(game_id)
    sql = ""
    for i in range(cids.__len__()):
        sql += """
        select * from (
SELECT  cid, concat(com_name,'初赔') as com_name,insert_datetime ,`m1_0`,  `m2_0`,  `m2_1`, 
        `m3_0`,  `m3_1`,  `m3_2`,  `m4_0`,  `m4_1`,  `m4_2`,  `m4_3`,  `m0_0`,  `m1_1`,  `m2_2`,  `m3_3`,  `m4_4`
        ,order_num ,next.* 
        FROM bifen_index as main ,(
      SELECT cid as nid,`m1_0` as m0_1,`m2_0` as m0_2,`m2_1` as m1_2,`m3_0` as m0_3, `m3_1` as m1_3,
      `m3_2`as m2_3,  `m4_0` as m0_4,  `m4_1` as m1_4,  `m4_2` as m2_4,  `m4_3` as m3_4 FROM bifen_index
        WHERE game_id={game_id} and cid ={cid} AND win_type='客胜'  order by insert_datetime limit 1
        ) as next
                WHERE game_id={game_id} and main.cid={cid} AND win_type='主胜'  order by insert_datetime limit 1) as a1
        union all
select * from (
SELECT  cid, concat(com_name,'即赔') as com_name,insert_datetime ,`m1_0`,  `m2_0`,  `m2_1`, 
        `m3_0`,  `m3_1`,  `m3_2`,  `m4_0`,  `m4_1`,  `m4_2`,  `m4_3`,  `m0_0`,  `m1_1`,  `m2_2`,  `m3_3`,  `m4_4`
        ,order_num ,next.* 
        FROM bifen_index as main ,(
            SELECT  cid as nid,`m1_0` as m0_1,  `m2_0` as m0_2,  `m2_1` as m1_2,  `m3_0` as m0_3,  `m3_1` as m1_3,
            `m3_2`as m2_3,  `m4_0` as m0_4,  `m4_1` as m1_4,  `m4_2` as m2_4,  `m4_3` as m3_4 FROM bifen_index
        WHERE game_id={game_id} and cid ={cid}  AND win_type='客胜' order by insert_datetime desc limit 1
        ) as next
                WHERE game_id={game_id} and main.cid={cid} AND win_type='主胜'  order by insert_datetime desc limit 1) as a1
        """.format(game_id=game_id, cid=cids[i]['cid'])
        if i != cids.__len__() - 1:
            sql += " UNION ALL "
    return DBUtils.execute(sql)
Beispiel #4
0
def query_echart_data(game_id):
    """ 查询比分的echart 数据 """
    return DBUtils.execute(
        "select * from bifen_index_std where game_id=%s and win_type = 'main'"
        " union all "
        "select * from bifen_index_std where game_id=%s and win_type = 'next'",
        (game_id, game_id))
Beispiel #5
0
def query_record(game_id=729379):
    # 查询主胜 客胜,主胜 拼接 客胜
    return DBUtils.execute(
        """
     SELECT *,substr(insert_datetime,6,11) as dd from (
         SELECT  cid, concat(com_name,'即赔') as com_name,insert_datetime ,`m1_0`,  `m2_0`,  `m2_1`, 
        `m3_0`,  `m3_1`,  `m3_2`,  `m4_0`,  `m4_1`,  `m4_2`,  `m4_3`,  `m0_0`,  `m1_1`,  `m2_2`,  `m3_3`,  `m4_4`
        ,order_num ,next.* 
        FROM bifen_index as main ,(
				SELECT  cid as nid,`m1_0` as m0_1,  `m2_0` as m0_2,  `m2_1` as m1_2,  `m3_0` as m0_3,  `m3_1` as m1_3,
				`m3_2`as m2_3,  `m4_0` as m0_4,  `m4_1` as m1_4,  `m4_2` as m2_4,  `m4_3` as m3_4 FROM bifen_index
        WHERE game_id=%(game_id)s AND win_type='客胜' and insert_datetime =
				(select max(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s)
        ) as next
                WHERE game_id=%(game_id)s AND win_type='主胜' and main.cid=next.nid and insert_datetime =
                        (select max(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s)
        union all
        SELECT  cid,concat(com_name,'初赔') as com_name,insert_datetime ,`m1_0`,  `m2_0`,  `m2_1`,  
                `m3_0`,  `m3_1`,  `m3_2`,  `m4_0`,  `m4_1`,  `m4_2`,  `m4_3`,  `m0_0`,  `m1_1`,  `m2_2`,  `m3_3`,
                `m4_4`,order_num, next.* FROM bifen_index as main ,(
                        SELECT  cid as nid,`m1_0` as m0_1,  `m2_0` as m0_2,  `m2_1` as m1_2,  `m3_0` as m0_3,
                        `m3_1` as m1_3,  `m3_2`as m2_3,  `m4_0` as m0_4,  `m4_1` as m1_4,  `m4_2` as m2_4, 
                        `m4_3` as m3_4 FROM bifen_index
                WHERE game_id=%(game_id)s AND win_type='客胜' and insert_datetime =
                        (select min(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s)
        ) as next
                WHERE game_id=%(game_id)s AND win_type='主胜' and main.cid=next.nid and insert_datetime =
                        (select min(insert_datetime) FROM bifen_index WHERE game_id=%(game_id)s)
              ) as all_data ORDER BY order_num,insert_datetime
    """, {"game_id": game_id})
Beispiel #6
0
def query_had_comps(game_id):
    """
    查询已经抓取的公司
    :return:
    """
    return DBUtils.execute(
        "SELECT cid,order_num FROM `bifen_index` WHERE game_id=%(game_id)s "
        "GROUP BY cid,order_num ORDER BY order_num", {"game_id": game_id})
def query_jishioupei_echart3(gameId):
    """
    :param gameId:
    :return: 返回即时让球欧赔凯里
    """
    sql = """select kelly_win_num,kelly_avg_num,kelly_lost_num from pay_info 
        where game_id=%s and insert_datetime=(select max(insert_datetime) from pay_info where game_id=%s)"""
    return DBUtils.executeOne(sql,(gameId,gameId))
def queryMatchCount():
    """
    查询所有日期
    :return:
    """
    return DBUtils.execute(
        "SELECT left(start_datetime,10) dd FROM football_game_info "
        "GROUP BY left(start_datetime,10) ORDER BY dd desc")
Beispiel #9
0
def insert_bodan_std(bodan_std):
    """
    插入波胆标准差
    :param bodan_std:
    :return: 插入条数
    """
    return DBUtils.executeOne(
        "INSERT INTO bifen_bodan_std(game_id,win_bodan_std,dogfall_bodan_std"
        ",lose_bodan_std) values (%s,%s,%s,%s)", bodan_std)
def query_oupei_echart3(gameId):
    """
    查询对应比赛的初始凯利值
    :param gameId: 比赛编号
    :return:
    """
    sql = """select kelly_win_num,kelly_avg_num,kelly_lost_num from rangqiu_startvalue_info 
        where game_id=%s and insert_datetime=(select max(insert_datetime) from rangqiu_startvalue_info where game_id=%s)"""
    return DBUtils.executeOne(sql,(gameId,gameId))
Beispiel #11
0
def query_bodan_std(game_id):
    """
    查询波胆平方差
    :param game_id:
    :return:
    """
    return DBUtils.queryNoDict(
        "select win_bodan_std, dogfall_bodan_std, lose_bodan_std, "
        " substr(insert_datetime,6,11) as insert_datetime "
        "from bifen_bodan_std where game_id=%s", game_id)
Beispiel #12
0
def query_game_dates(before_days=12):
    """ 查询12天之后的日期 """
    dd = (datetime.today() + timedelta(days=-before_days)).strftime('%Y-%m-%d')
    all_date = DBUtils.executeOne(
        "SELECT substr(start_datetime,1,10) as start_datetime FROM football_game_info where"
        " start_datetime>%s group by substr(start_datetime,1,10) "
        "order by substr(start_datetime,1,10) desc", dd)
    if all_date is None:
        all_date = []
    return all_date
Beispiel #13
0
def query_oupei_gameId():
    """获取game_id列表"""
    sql = """select game_id from oupei_startvalue_info"""
    game_id = DBUtils.executeOne(sql)
    gameId_list = []
    for gameId in game_id:
        gameId_id = gameId["game_id"]
        gameId_list.append(gameId_id)
    # print(gameId_list)
    return set(gameId_list)
def query_oupei_echart2(gameId):
    """
    查询对应比赛的初始凯利值
    :param gameId: 比赛编号
    :return:
    """
    sql="""select kelly_win_num,kelly_avg_num,kelly_lost_num from oupei_startvalue_info 
    where game_id=%s and insert_datetime=(select max(insert_datetime) from oupei_startvalue_info where game_id=%s)"""
    #select * from oupei_startvalue_info where game_id='807259' and insert_datetime=(select max(insert_datetime) from oupei_startvalue_info where game_id='807259');
    try:
        return DBUtils.executeOne(sql, (gameId,gameId))
    except Exception as e:
        return None
def query_oupei_echart(dd, gameId):
    """
    查询指定日期到现在的数据库
    :param dd: 日期
    :param gameId: 比赛编号
    :return: 结果
    """
    sql = """select game_id,
                GROUP_CONCAT( eu_win_num SEPARATOR ',' ) AS eu_win_num,
                GROUP_CONCAT( eu_avg_num SEPARATOR ',' ) AS eu_avg_num,
                GROUP_CONCAT( eu_lost_num SEPARATOR ',' ) AS eu_lost_num,
                GROUP_CONCAT( kelly_win_num SEPARATOR ',' ) AS kelly_win_num,
                GROUP_CONCAT( kelly_avg_num SEPARATOR ',' ) AS kelly_avg_num,
                GROUP_CONCAT( kelly_lost_num SEPARATOR ',' ) AS kelly_lost_num,
                insert_datetime
        FROM oupei_info where insert_datetime > %s and game_id=%s GROUP BY game_id,insert_datetime"""
    return DBUtils.executeOne(sql, (dd, gameId))
Beispiel #16
0
def process_sub_page3(
        testElemNo=None,
        first_tmp_url="http://odds.500.com/fenxi/rangqiu-%s.shtml?lot=jczq",
        next_tmp_url="http://odds.500.com/fenxi1/rangqiu.php?id=%s&ctype=1&"
    "start=%d&r=1&style=0&guojia=0&chupan=0&lot=jczq",
        table_name="rangqiu_startvalue_info"):
    startTotalTime = time.time()
    if testElemNo is not None:
        elemNoList = testElemNo
    else:
        elemNoList = DBUtils.execute(
            "select game_no,start_datetime from football_game_info "
            "where start_datetime>now() order by start_datetime ")
    all_sub_data = []
    str_sql = "insert into " + table_name + "(kelly_win_num, kelly_avg_num, " \
                                            "kelly_lost_num,game_id) values(%s,%s,%s,%s) "

    # 设置解析列索引
    params = {}
    if table_name != "pay_info":
        params = {"col_index": 2}
    if not isinstance(elemNoList, list):
        print("不是数组:", end="")
        print(elemNoList)
        return []
    """  解析让球指数页面 """
    for elObj in elemNoList:
        startTime = time.time()
        time.sleep(ConstantVal.TIMER_INTERVAL)
        elNo = elObj["game_no"]

        url = first_tmp_url % elNo
        # 子页面第一页的数据,存在多页的情况
        all_grade_data = analysis_all_sub_page3(cra_data_url(url, "utf-8"),
                                                **params)

        # 子页面其他页的数据
        start = 30
        while True:
            url = next_tmp_url % (elNo, start)
            htmlData = cra_data_url(url, "utf-8")
            if htmlData and len(htmlData.strip()) > 100:
                result = analysis_all_sub_page3(htmlData, 'tr', **params)
                all_grade_data = concat_array(all_grade_data, result)
                start += 30
            else:
                break

        # 计算平方差
        if len(all_grade_data) > 0:
            avg_data = [
                str(float('%.3f' % cal_std2(bb))) for bb in all_grade_data
            ]
            avg_data.append(elNo)
            all_sub_data.append(avg_data)
            # count = DBUtils.executeOne(str_sql, avg_data)
        print(
            str(elNo) + " " + table_name + " 子页总共抓取条数:%s  耗时:%s" %
            (start, utils.float_num(time.time() - startTime)))
    count = DBUtils.executeMany(str_sql, all_sub_data)
    print(all_sub_data)
    print("子页面保存数量:%s 耗时:%s" %
          (count, utils.float_num(time.time() - startTotalTime)))
    return elemNoList
Beispiel #17
0
def parse_bifen(game_id=729379):
    time.sleep(ConstantVal.TIMER_INTERVAL)
    startTime = time.time()
    bifen_url = "http://odds.500.com/fenxi/bifen-%s.shtml" % game_id
    bifen_html = cra_data.cra_data_url(bifen_url)
    # 只解析class 为pub_table的table 即可
    only_table = SoupStrainer("table", class_="pub_table")
    soup = BeautifulSoup(bifen_html, "lxml", parse_only=only_table)
    if not soup:
        print("%s 内容无效" % game_id)
        return
    # 获取比分,find搜索到一个就会停止搜索,搜索全部要使用 find_all
    scores_th = soup.find('tr').contents[5:]
    scores = [text.get_text() for text in scores_th if type(text) is bs4.element.Tag]

    # 0:0的位置
    dogfall_index = scores.index("0:0")
    # 所有赔率
    #  威廉希尔:293、Bwin:5、Bet365:3、Eurobet:15   1Bet:671、金宝博:348、澳门:11、18Bet:863
    # 用于排序 老的 ["威廉希尔", "Bet365", "Bwin", "澳门"] + ["Eurobet", "1Bet", "金宝博"]
    specify_cids = [293, 5, 3, 15]+[863, 348, 11]
    main_comp = ["威廉希尔", "Bwin", "Bet365", "Eurobet"] + ["18Bet", "金宝博", "澳门"]

    # 用于在网页上查找所有 有class属性的tr
    all_trs = soup.find_all("tr", {"class": re.compile(".*")})

    find_table_bifen_result = []
    main_win_data = []
    next_win_data = []
    col_pay_data = []
    for tr in all_trs:
        # 获取编号
        cid = parse_qs(urlparse(tr.a.get('href')).query)['cid']
        # 获取横着的整行数据
        temp_list = list(tr.stripped_strings)

        # 平局赔付
        avg_pay_list = temp_list[(dogfall_index + 2) * 2:]
        com_name = temp_list[1]
        order_num = [(main_comp.index(com_name) + 1) * 10 if com_name in main_comp else 100]
        # 主胜的赔率
        main_win_pay = [ps for ps in temp_list[2:(dogfall_index + 2) * 2][::2]]
        # 客胜的赔率
        next_win_pay = [ps for ps in temp_list[2:(dogfall_index + 2) * 2][1::2]]
        # 主胜
        temp_list_main = [game_id] + cid + temp_list[:2] + order_num + main_win_pay + avg_pay_list
        # 客胜
        temp_list_next = [game_id] + cid + temp_list[:2] + order_num + next_win_pay + avg_pay_list

        # 将客胜、平局、主胜的赔率存入数组
        col_pay_data.append(main_win_pay[1:] + avg_pay_list + next_win_pay[1:])

        main_win_data.append(temp_list_main)
        next_win_data.append(temp_list_next)
        #
        if com_name in com_name and cid and int(cid[0]) in specify_cids:
            find_table_bifen_result.append(temp_list_main)
            find_table_bifen_result.append(temp_list_next)

    # 比分存入数据库
    if find_table_bifen_result.__len__() > 0:
        result_count = DBUtils.executeMany(
            "insert into bifen_index(game_id,`cid`,`html_index`,`com_name`,order_num,win_type,`m1_0`,`m2_0`,`m2_1`"
            ",`m3_0`,`m3_1`,"
            "`m3_2`,`m4_0`,`m4_1`,`m4_2`,`m4_3`,`m0_0`,`m1_1`,`m2_2`,`m3_3`,`m4_4`) values (%s,%s,%s,%s,%s,"
            "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", find_table_bifen_result)
        print("%s bifen存库条数:%s 耗时:%s" % (game_id, result_count, utils.float_num(time.time() - startTime)))
    # 计算波胆的标准差,然后存入到标准差表
    if col_pay_data and len(col_pay_data) > 0:
        # 计算波胆平方差
        calc_bodan_std_result = bodan_std(col_pay_data)
        result = bifenImpl.insert_bodan_std([game_id] + calc_bodan_std_result)
        print("%s 保存波胆标准差数据条数:%s" % (game_id, result))
Beispiel #18
0
def insert_std_data(std_data):
    """ 保存比分的标准差 """
    return DBUtils.executeMany(
        "INSERT INTO bifen_index_std(win_type,m1_0,m2_0,m2_1,m3_0,m3_1,m3_2"
        ",m4_0,m4_1,m4_2,m4_3,m0_0,m1_1,m2_2,m3_3,m4_4,"
        "game_id)VALUES(" + "%s," * 16 + "%s" + ") ", std_data)