Beispiel #1
0
def fb_gid_getExt010(x10):
    bars=pd.Series(x10,index=tfsys.gidSgn,dtype=str)
    gid=bars['gid']
    #
    fss_az=tfsys.rhtmYazhi+gid+'_az.htm'
    uss_az=tfsys.us0_extYazhi+gid+'.shtml'
    fss_oz=tfsys.rhtmOuzhi+gid+'_oz.htm'
    uss_oz=tfsys.us0_extOuzhi+gid+'.shtml' 
    fss_tz=tfsys.rhtmTouzhu+gid+'_tz.htm'
    uss_tz=tfsys.us0_extTouzhu+gid+'.shtml'
    
    htm_az=zweb.web_get001txtFg(uss_az,fss_az)
    htm_oz=zweb.web_get001txtFg(uss_oz,fss_oz) #zt.zt_web_get001txtFg or(fsiz<5000):
    htm_tz=zweb.web_get001txtFg(uss_tz,fss_tz)
    #  
    fxdat_az=tfsys.rxdat+gid+'_az.dat'
    fxdat_oz=tfsys.rxdat+gid+'_oz.dat'
    fxdat_tz=tfsys.rxdat+gid+'_tz.dat'
    fsiz_az=zt.f_size(fxdat_az);#print(zsys.sgnSP4,'@',fsiz,fxdat)
    fsiz_oz=zt.f_size(fxdat_oz);
    fsiz_tz=zt.f_size(fxdat_tz);
    #
    #print('xtfb.bars',xtfb.bars)
    if (fsiz_az<1000)or(tfsys.xnday_down<10): 
        fb_gid_getExt_az4htm(htm_az,bars,ftg=fxdat_az)
        
    if (fsiz_oz<1000)or(tfsys.xnday_down<10): 
        fb_gid_getExt_oz4htm(htm_oz,bars,ftg=fxdat_oz)

    if (fsiz_tz<1000)or(tfsys.xnday_down<10): 
        fb_gid_getExt_tz4htm(htm_tz,bars,ftg=fxdat_tz)
 
    
    return fxdat_az,fxdat_oz,fxdat_tz
Beispiel #2
0
def fb_gid_getExt010(x10):
    bars = pd.Series(x10, index=tfsys.gidSgn, dtype=str)
    gid = bars['gid']
    #
    fss = tfsys.rhtmOuzhi + gid + '_oz.htm'
    uss = tfsys.us0_extOuzhi + gid + '.shtml'
    #print(uss)
    htm = zweb.web_get001txtFg(uss, fss)  #zt.zt_web_get001txtFg or(fsiz<5000):
    #
    fxdat = tfsys.rxdat + gid + '_oz.dat'
    fsiz = zt.f_size(fxdat)
    #print(zsys.sgnSP4,'@',fsiz,fxdat)
    #
    #print('xtfb.bars',xtfb.bars)
    if (fsiz < 1000) or (tfsys.xnday_down < 10):
        fb_gid_getExt_oz4htm(htm, bars, ftg=fxdat)
    '''    
    #
    fss=xtfb.rhtmYazhi+xtfb.kgid+'_az.htm'
    uss=xtfb.us0_extYazhi+xtfb.kgid+'.shtml'
    #
    fss=xtfb.rhtmShuju+xtfb.kgid+'_sj.htm'
    uss=xtfb.us0_extShuju+xtfb.kgid+'.shtml'
    '''

    return fxdat
Beispiel #3
0
def fb_gid_get_nday(xtfb,timStr,fgExt=False):
    if timStr=='':ktim=xtfb.tim_now
    else:ktim=arrow.get(timStr)
    #
    nday=tfsys.xnday_down
    for tc in range(0, nday):
        xtim=ktim.shift(days= -tc)
        xtimStr=xtim.format('YYYY-MM-DD')
        #print('\nxtim',xtim,xtim<xtfb.tim0_gid)
        #
        xss=str(tc)+'#,'+xtimStr+',@'+ zt.get_fun_nam()
        zt.f_addLog(xss)
        if xtim<xtfb.tim0_gid:
            print('#brk;')
            break
        #
        
        fss=tfsys.rghtm+xtimStr+'.htm'
        uss=tfsys.us0_gid+xtimStr
        print(timStr,tc,'# update--',fss)
        #
        htm=zweb.web_get001txtFg(uss,fss)
        if len(htm)>5000:
            df=fb_gid_get4htm(htm)
            if len(df['gid'])>0:
                tfsys.gids=tfsys.gids.append(df)
                tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True)
                #
                #if fgExt:fb_gid_getExt(df)
                if fgExt:fb_gid_getExtPool(df)
    #
    if tfsys.gidsFN!='':
        print('+++++')
        print(tfsys.gids.tail())
        tfsys.gids.to_csv(tfsys.gidsFN,index=False,encoding='gb18030')
Beispiel #4
0
def fb_gid_get_nday(xtfb, timStr, fgExt=False):
    ###1. 下载各轮赛数据
    for league in tfsys.league:
        sc = fb_download_league_data(league)
        tfsys.league_sc = pd.concat([tfsys.league_sc, sc], ignore_index=True)

    ### 2.下载当期比赛
    if timStr == '': ktim = xtfb.tim_now
    else: ktim = arrow.get(timStr)
    #
    nday = tfsys.xnday_down
    for tc in range(nday):
        xtim = ktim.shift(days=-tc)
        xtimStr = xtim.format('YYYY-MM-DD')
        #print('\nxtim',xtim,xtim<xtfb.tim0_gid)
        #
        xss = str(tc) + '#,' + xtimStr + ',@' + zt.get_fun_nam()
        zt.f_addLog(xss)
        if xtim < xtfb.tim0_gid:
            print('#brk;')
            break
        #
        fss = tfsys.rghtm + xtimStr + '.htm'
        uss = tfsys.us0_gid + xtimStr

        print(timStr, tc, '#', fss)
        #
        htm = zweb.web_get001txtFg(uss, fss)
        if len(htm) > 5000:
            df = fb_gid_get4htm(htm)  #提取每天比赛场次
            if len(df['gid']) > 0:
                tfsys.gids = tfsys.gids.append(df)
                tfsys.gids.drop_duplicates(subset='gid',
                                           keep='last',
                                           inplace=True)
                #
                if fgExt: fb_gid_getExt(df)  #单线程
                #if fgExt:fb_gid_getExtPool(df)   #多线程
    #
    if tfsys.gidsFN != '':
        print('+++++')
        print(tfsys.gids.tail())
        tfsys.gids.to_csv(tfsys.gidsFN, index=False, encoding='gb18030')
Beispiel #5
0
def fb_download_league_data(league, season, fgSample=False):
    leagueId = tfsys.leagueId[league]
    subleagueId = tfsys.subleagueId[league]
    #    season = '2018-2019'

    fss = tfsys.lghtm + season + '_' + leagueId + '.js'
    uss = tfsys.us0_league + season + '/s' + leagueId + subleagueId + '.js'

    htm = zweb.web_get001txtFg(uss, fss)
    #    if len(htm)>5000:

    ### 1. 轮赛每队当前的基本面,包括进、失球,积分,排名等
    teamDict, scoresDf = fb_get_team_dataset(htm)
    #    tfsys.teamIds = tfsys.teamIds.append(teamDf)
    tfsys.teamIds = dict(tfsys.teamIds, **teamDict)
    #    tfsys.teamIds.drop_duplicates(subset='team_id', keep='last', inplace=True)

    ### 2. 过去赛季比赛场次ID,赔率、及当时两队的对赛记录(用于算法的训练)
    if fgSample:
        fb_league_gids(htm, league)
Beispiel #6
0
def fb_gid_getExt010(x10):
    bars = pd.Series(x10, index=tfsys.gidSgn, dtype=str)
    gid = bars['gid']
    isdownload = False
    features = pd.DataFrame()
    #    gid = '1552246'
    print('gid:', gid)

    ### 1.下载投注量网页,投注量比值特征
    uss_tz = tfsys.us0_extTouzhu + gid + '.htm?%s'
    fss_tz = tfsys.rhtmTouzhu + gid + '.htm'
    fxdat_tz = tfsys.rxdat + gid + '_tz.dat'

    for timeout in range(5):  #没有下载到网页,就重复5遍
        if not isdownload:
            #获取当前时间
            uss_tz = uss_tz % (time.mktime(
                datetime.datetime.now().timetuple()))
            htm_tz = zweb.web_get001txtFg(
                uss_tz, fss_tz)  #zt.zt_web_get001txtFg or(fsiz<5000):
            if htm_tz == '404':  #### 没有下载到网页
                #                print('############ 404 ############')
                isdownload = False
            else:
                isdownload = True
    if not isdownload:
        return '######### can not download the html ##########'


#    df = fb_gid_getExt_tz4htm(htm_tz,bars,ftg=fxdat_tz)
#    if df.empty: return
    volumes_feature = fb_get_volumes_features(htm_tz, bars, ftg=fxdat_tz)
    if len(volumes_feature) == 0:
        print('投注数据不足,不预测该场比赛')
        return

    ### 2.下载赔率网页,赔率特征
    uss_oz = tfsys.us0_extOuzhi + gid + '.js'
    fss_oz = tfsys.rhtmOuzhi + gid + '.js'
    fxdat_oz_1 = tfsys.rxdat + gid + '_oz_1.dat'
    fxdat_oz_2 = tfsys.rxdat + gid + '_oz_2.dat'
    htm_oz = zweb.web_get001txtFg(uss_oz, fss_oz)
    odds_feature = fb_get_odds_features(htm_oz, bars, ftg=fxdat_oz_1)
    #    fb_gid_getExt_oz4htm_1(htm_oz, bars,ftg=fxdat_oz_1)

    ### 3. 下载分析网页,对赛,主、客队近期赛特征
    uss_fx = tfsys.us0_extFenxi + gid + '.htm'
    fss_fx = tfsys.rhtmFenxi + gid + '.htm'
    fxdat_fx = tfsys.rxdat + gid + '_fx.dat'
    htm_fx = zweb.web_get001txtFg(uss_fx, fss_fx)
    score_feature = fb_get_score_features(htm_fx, bars, ftg=fxdat_fx)

    #    fb_gid_getExt_oz4htm_1(htm_oz,bars,ftg=fxdat_oz_1)
    #    fb_gid_getExt_oz4htm_2(htm_oz,bars,ftg=fxdat_oz_2)

    ### 4. 拼接所有特征值,并保存
    #    features.loc[0,"FTR"] = bars['kwin']
    features = pd.concat([score_feature, odds_feature, volumes_feature],
                         axis=1)
    features.insert(0, 'FTR', bars['kwin'])
    fss_features = tfsys.rxdat + gid + '_ftr.dat'
    features.to_csv(fss_features, index=False, encoding='gb18030')  #把特征值保存下来

    return fss_oz