def fb_gid_get_nday(xtfb,timStr,fgExt=False): if timStr=='':ktim=xtfb.tim_now else:ktim=arrow.get(timStr) # nday=tfsys.xnday_down for tc in range(0, nday): xtim=ktim.shift(days= -tc) xtimStr=xtim.format('YYYY-MM-DD') #print('\nxtim',xtim,xtim<xtfb.tim0_gid) # xss=str(tc)+'#,'+xtimStr+',@'+ zt.get_fun_nam() zt.f_addLog(xss) if xtim<xtfb.tim0_gid: print('#brk;') break # fss=tfsys.rghtm+xtimStr+'.htm' uss=tfsys.us0_gid+xtimStr print(timStr,tc,'# update--',fss) # htm=zweb.web_get001txtFg(uss,fss) if len(htm)>5000: df=fb_gid_get4htm(htm) if len(df['gid'])>0: tfsys.gids=tfsys.gids.append(df) tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True) # #if fgExt:fb_gid_getExt(df) if fgExt:fb_gid_getExtPool(df) # if tfsys.gidsFN!='': print('+++++') print(tfsys.gids.tail()) tfsys.gids.to_csv(tfsys.gidsFN,index=False,encoding='gb18030')
def fb_gid_get_nday(xtfb, timStr, fgExt=False): ###1. 下载各轮赛数据 for league in tfsys.league: sc = fb_download_league_data(league) tfsys.league_sc = pd.concat([tfsys.league_sc, sc], ignore_index=True) ### 2.下载当期比赛 if timStr == '': ktim = xtfb.tim_now else: ktim = arrow.get(timStr) # nday = tfsys.xnday_down for tc in range(nday): xtim = ktim.shift(days=-tc) xtimStr = xtim.format('YYYY-MM-DD') #print('\nxtim',xtim,xtim<xtfb.tim0_gid) # xss = str(tc) + '#,' + xtimStr + ',@' + zt.get_fun_nam() zt.f_addLog(xss) if xtim < xtfb.tim0_gid: print('#brk;') break # fss = tfsys.rghtm + xtimStr + '.htm' uss = tfsys.us0_gid + xtimStr print(timStr, tc, '#', fss) # htm = zweb.web_get001txtFg(uss, fss) if len(htm) > 5000: df = fb_gid_get4htm(htm) #提取每天比赛场次 if len(df['gid']) > 0: tfsys.gids = tfsys.gids.append(df) tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True) # if fgExt: fb_gid_getExt(df) #单线程 #if fgExt:fb_gid_getExtPool(df) #多线程 # if tfsys.gidsFN != '': print('+++++') print(tfsys.gids.tail()) tfsys.gids.to_csv(tfsys.gidsFN, index=False, encoding='gb18030')
def fb_gid_get4htm(htm): bs = BeautifulSoup(htm, 'html5lib') # 'lxml' df = pd.DataFrame(columns=tfsys.gidSgn, dtype=str) ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str) #---1# zsys.bs_get_ktag_kstr = 'isend' x10 = bs.find_all(zweb.bs_get_ktag) for xc, x in enumerate(x10): #print('\n@x\n',xc,'#',x.attrs) ds['gid'], ds['gset'] = x['fid'], zstr.str_fltHtmHdr(x['lg']) ds['mplay'] = zstr.str_fltHtmHdr(x['homesxname']) ds['gplay'] = zstr.str_fltHtmHdr(x['awaysxname']) ds['kend'] = x['isend'] s2 = ds['tweek'] = x['gdate'].split(' ')[0] #tweek ds['tweek'] = fb_tweekXed(s2) ds['tplay'], ds['tsell'] = x['pdate'], x['pendtime'] #tplay,tsell, # df = df.append(ds.T, ignore_index=True) #---2# x20 = bs.find_all('a', class_='score') for xc, x in enumerate(x20): xss = x['href'] kss = zstr.str_xmid(xss, 'ju-', '.sh') clst = x.text.split(':') # ds = df[df['gid'] == kss] ds = df[df['gid'] == kss] if len(ds) == 1: inx = ds.index df['qj'][inx] = clst[0] df['qs'][inx] = clst[1] kwin = fb_kwin4qnum(int(clst[0]), int(clst[1])) df['kwin'][inx] = str(kwin) #---3# x20 = bs.find_all('td', class_='left_team') if (len(x20) == len(x10)): for xc, x in enumerate(x20): #print('@x',xc,'#',x.a['href']) xss = x.a['href'] if xss.find('/team//') < 0: xid = zstr.str_xmid(xss, '/team/', '/') df['mtid'][xc] = xid g01 = df['gid'][xc] if xid == '': zt.f_addLog('tid-mtid,nil,' + xss + ',gid,' + g01) #---4# x20 = bs.find_all('td', class_='right_team') if (len(x20) == len(x10)): for xc, x in enumerate(x20): #print('@x',xc,'#',x.a['href']) xss = x.a['href'] if xss.find('/team//') < 0: xid = zstr.str_xmid(xss, '/team/', '/') df['gtid'][xc] = xid g01 = df['gid'][xc] if xid == '': zt.f_addLog('tid-gtid,nil,' + xss + ',gid,' + g01) #---5# df = df[df['gid'] != '-1'] return df