def parse_button(self, all_nhan_dinh_link= None): all_nhan_dinh_link = 'http://bongdaplus.vn/nhan-dinh-bong-da/trang-1.html' range_page = self.gen_range(all_nhan_dinh_link= all_nhan_dinh_link) for link in range_page: if link ==None: pass else: html = request_html(link) soup = BeautifulSoup(html, 'html.parser') a_s = soup.select('a') hrefs = [] for a in a_s: try: hrefs.append(a['href']) except: pass hrefs = list(filter ( lambda a: 'nhan-dinh-bong-da-' in a, hrefs)) for h in hrefs: link = 'http://bongdaplus.vn/' + h rs = request_html(link) soup = BeautifulSoup(rs, 'html.parser') self.nhan_dinh_a_match_bondaplus(link=link) self.map_match_id() self.map_predict_id()
def nhan_dinh_a_match_aegoal(self,*arg,**karg): link = karg.get('link') if not link: print ('Not link***********') file = open('/media/sf_C_DRIVE/D4/dl/testfile_link1.html','r') html = file.read() soup = BeautifulSoup(html, 'html.parser') else: atuple = karg.get('atuple') link = atuple[0] team_1_2_date = atuple[2] team1_2 = team_1_2_date[0] dt = team_1_2_date[1] ngay = dt.date() dt = fields.Datetime.to_string(dt) print ('Co link************') rs = request_html(link) soup = BeautifulSoup(rs, 'html.parser') rs = soup.select('div.box-text-detail')[0].get_text() ti_so = du_doan_ti_so(rs) # raise UserError(ti_so) update_dict = {'nd_id':self.id} if ti_so: update_dict_more = { 'score1':ti_so[0],'score2':ti_so[1], 'state':'tu_dong'} else: update_dict_more = {'state':'can_read_du_doan'} update_dict.update(update_dict_more) ndlline = get_or_create_object_sosanh(self,'tsbd.ndlline', {'link':link, 'ngay':ngay,'ngay_gio':dt,'team1':team1_2[0],'team2':team1_2[1]}, update_dict) return ti_so
def nhan_dinh_bongdanet(self, all_nhan_dinh_link= None ): all_nhan_dinh_link = 'http://bongdanet.vn/nhan-dinh/p2' range_page = list(self.gen_range( patern = 'http://bongdanet.vn/nhan-dinh/p\d+', replacement = 'http://bongdanet.vn/nhan-dinh/p%s', all_nhan_dinh_link= all_nhan_dinh_link )) for link in range_page: html = request_html(link) soup = BeautifulSoup(html, 'html.parser') a_s = soup.select('div.news-item div.detail-news-item a') hrefs = [] for a in a_s: try: hrefs.append(['http://bongdanet.vn'+a['href'],a.get_text()]) except: pass hrefs = list(filter ( lambda a: 'nhan-dinh' in a[0] or 'phan-tich' in a[0], hrefs)) for atuple in hrefs: rs = parse_title_bongdanet(atuple[1]) atuple.append(rs) for c, at in enumerate(hrefs): if at[2] != None: ti_so = self.nhan_dinh_a_match_bongdanet(link=at[0],atuple= at) self.map_match_id() self.map_predict_id()
def test(self): away = u'Việt Nam' away = quote(away) link = 'http://bongdaso.com/_CastingInfo.aspx?FixtureID=56032&SeasonID=112&Flags=&Home=Bournemouth&Away=%s' % away print('link', link) raise UserError(u'%s' % (request_html(link)))
def leech_button(self): link = self.link if self.link_select == 'link1' else self.all_nhan_dinh_link rs = request_html(link) file = open('/media/sf_C_DRIVE/D4/dl/testfile_%s.html'%self.link_select,'w') file.write(rs) file.close() self.log =rs
def get_soup_of_events(fix_id, home,away): home = quote(home) away = quote(away) link = 'http://bongdaso.com/_CastingInfo.aspx?FixtureID={}&SeasonID=112&Flags=&Home={}&Away={}'.format(fix_id, home, away) html = request_html(link) soup = BeautifulSoup(html, 'html.parser') return soup
def get_soup(link): try: html = request_html(link) soup = BeautifulSoup(html, 'html.parser') except GethtmlError as e: raise GethtmlError(u'Lỗi khi get soup') return soup
def get_soup(link): soup = '' count_try = 0 while not str(soup) and count_try < 2: html = request_html(link) soup = BeautifulSoup(html, 'html.parser') count_try +=1 return soup
def get_team_and_date(self, match_link, add_update_dict, is_set_must_get_time = True): soup = None html = None if is_set_must_get_time: check_time_for_get_soup = 'time' not in add_update_dict else: check_time_for_get_soup = 'time' not in add_update_dict and 'date' not in add_update_dict is_get_soup = any(['home' not in add_update_dict, 'away' not in add_update_dict, check_time_for_get_soup]) if is_get_soup: html = request_html(match_link) soup = BeautifulSoup(html, 'html.parser') # soup = get_soup(match_link) if 'home' not in add_update_dict: home = soup.select('div#scr_home a')[0].get_text() home = re.sub('\s+\[\d+\]', '', home) else: home = add_update_dict['home'] if 'away' not in add_update_dict: away = soup.select('div#scr_away a')[0].get_text() away = re.sub('\[\d+\]\s+', '', away) else: away = add_update_dict['away'] home = home.strip() away = away.strip() if 'time' in add_update_dict: begin_time = add_update_dict['time'] dtime = datetime.strptime(begin_time,'%d/%m/%Y %H:%M') - timedelta(hours=7) str_time = fields.Datetime.to_string(dtime) match_date = dtime.date() str_date = fields.Date.to_string(dtime) else: if is_set_must_get_time: begin_time = soup.select('div#scr_start')[0].get_text() begin_time = begin_time[9:] dtime = datetime.strptime(begin_time,'%d/%m/%Y %H:%M') - timedelta(hours=7) str_time = fields.Datetime.to_string(dtime) match_date = dtime.date() str_date = fields.Date.to_string(match_date) else: match_date = datetime.strptime(add_update_dict['date'],'%d/%m/%Y') str_date = fields.Date.to_string(match_date) str_time = None team1_id = get_or_create_object_sosanh(self,'tsbd.team',{'name':home}) team2_id = get_or_create_object_sosanh(self,'tsbd.team',{'name':away}) team_dict = {'team1': team1_id.id, 'team2': team2_id.id, # 'time':str_time, 'date':str_date, } return team_dict, match_date, str_time,home,away, soup, html
def nhan_dinh_a_match_bondaplus(self,*arg,**karg): link = karg.get('link') if not link: print ('Not link***********') file = open('/media/sf_C_DRIVE/D4/dl/testfile_link1.html','r') html = file.read() soup = BeautifulSoup(html, 'html.parser') else: print ('Co link************') rs = request_html(link) soup = BeautifulSoup(rs, 'html.parser') s = soup.select('h1.tit') str = s[0].get_text() print ('title **', str) rs = re.search(r'Nhận định bóng đá (.+?) vs (.+?),', str) if not rs: rs = re.search(r'Nhận định bóng đá (.+?) và (.+?),', str) if not rs: rs = re.search(r'Nhận định bóng đá.*?: (.+?) vs (.+?)$', str) if not rs: rs = re.search(r'Nhận định bóng đá.*?: (.+?) và (.+?)$', str) team1= rs.group(1).strip() team2= rs.group(2).strip() rs_search = re.search('(\d+)h(\d*).*?ngày\s+(\d+)/(\d+)', str) # gio= rs.group(1).strip() # rs_ngay = re.search('ngày\s+(\d+)/(\d+)', str) # ngay= rs.group(1).strip() rs = (rs_search.group(1), rs_search.group(2),rs_search.group(3),rs_search.group(4)) rs = list(map(lambda i:int_a_minute(i), rs)) dt = datetime(year=datetime.now().year, month= rs[3], day= rs[2], hour= rs[0], minute = rs[1]) - timedelta(hours=7) # dt = lay_du_doan_ngay_gio(gio,ngay) dt = dt - timedelta(hours = 7) ngay =dt.date() dt = fields.Datetime.to_string(dt) update_dict = {'ngay':ngay,'ngay_gio':dt,'nd_id':self.id} try: score1,score2 = self.du_doan(soup) update_dict.update({ 'score1':score1,'score2':score2, 'state':'tu_dong'}) except FETCHERROR: update_dict.update({ 'state':'can_read_du_doan'}) ndlline = get_or_create_object_sosanh(self,'tsbd.ndlline', {'link':link,'team1':team1,'team2':team2}, update_dict)
def nhan_dinh_aegoal(self, all_nhan_dinh_link= None ): # all_nhan_dinh_link = 'http://bongdaplus.vn/nhan-dinh-bong-da/trang-1.html' all_nhan_dinh_link = 'https://aegoal.net/nhan-dinh-bong-da.html?trang=1' range_page = self.gen_range(patern = 'trang-\d+',replacement = 'trang-%s', all_nhan_dinh_link= all_nhan_dinh_link) for link in range_page: html = request_html(link) soup = BeautifulSoup(html, 'html.parser') rs = soup.select('div.list-item-new a') hrefs = [] for a in rs: hrefs.append([a['href'],a.get_text()]) hrefs = list(filter ( lambda a: 'nhan-dinh' in a[0] or 'phan-tich' in a[0], hrefs)) for atuple in hrefs: rs = parse_title_bongdanet(atuple[1]) atuple.append(rs) for c, at in enumerate(hrefs): if at[2] != None: ti_so = self.nhan_dinh_a_match_aegoal(link=at[0],atuple= at) self.map_match_id() self.map_predict_id()
def gen_lineup_new(self, match_link, search_dict, match_id): match_link = match_link.replace('Data=Odds', 'Data=lineup').replace( 'Data=Casting', 'Data=lineup') if 'Data=lineup' not in match_link: match_link = match_link + '&Data=lineup' html = request_html(match_link) lineup_dict = {} playerlines = [] for patern in [ ("'_HomeLineup_','(.*?)'", search_dict['team1'], 'home'), ("'_AwayLineup_','(.*?)'", search_dict['team2'], 'away') ]: rs = re.search(patern[0], html) rs = 'http://bongdaso.com/' + rs.group(1) rs = request_html(rs) soup = BeautifulSoup(rs, 'html.parser') rs = soup.select('div.squad_table table tr') da_chinhs = [] da_phus = [] alist = da_chinhs for count, tr in enumerate(rs): if count != 0: if tr.get('class') == ['fixture_separator']: alist = da_phus continue gt = tr.get_text() number = tr.select('td:nth-of-type(1)')[0].get_text() try: number = int(number) except: number = False print('tr**', tr) # player_name_tr = tr.select('td:nth-of-type(2) div')[0] player_name_tr = tr.select('td:nth-of-type(2)')[0] name = player_name_tr.get_text() if number: adict_search = {'number': int(number), 'name': name} else: adict_search = {'name': name} adict_update = {} player_id = player_name_tr.get('id') if player_id: player_id = player_id.replace('player_', 'player_tip_') player_id_soup = soup.select('div#%s' % player_id)[0] image_soup = player_id_soup.select( 'div.boxBody > table > tr:nth-of-type(1) > td:nth-of-type(1) img' ) #[0].get_text() if image_soup: image_soup = image_soup[0] image_link = image_soup['src'] image_link = image_link.replace('&', '&') image_link = 'http://bongdaso.com/' + image_link else: image_link = False trs = player_id_soup.select( 'div.boxBody > table > tr:nth-of-type(1) > td:nth-of-type(2) tr' ) #[0].get_text() if image_link: adict_update['image_link'] = image_link for count, tr in enumerate(trs): if count == 0: continue if count == 1: td2 = tr.select( 'td:nth-of-type(2)')[0].get_text() dt = datetime.strptime(td2, '%d/%m/%Y') adict_update[ 'birthday'] = fields.Date.to_string(dt) alist.append((adict_search, adict_update)) for da_chinh_or_du_bi in [(da_chinhs, 'da_chinh'), (da_phus, 'du_bi')]: players = map( lambda i: get_or_create_object_sosanh( self, 'tsbd.player', i[0], i[1]).id, da_chinh_or_du_bi[0]) a_playerlines = map( lambda i: get_or_create_object_sosanh( self, 'tsbd.playerline', { 'player_id': i, 'team_id': patern[1], 'home_or_away': patern[2], 'da_chinh_hay_du_bi': da_chinh_or_du_bi[1], 'match_id': match_id }).id, players) playerlines += list(a_playerlines) lineup_dict['playerline_ids'] = [(6, 0, playerlines)] return lineup_dict
def get_soup_ajax_link(fix_id,template_link): # fix_id = get_fix_id(match_link) score_link = template_link%fix_id html = request_html(score_link) soup = BeautifulSoup(html, 'html.parser') return soup
def leech_button(self): rs = request_html(self.link) file = open('/media/sf_C_DRIVE/D4/dl/testfile.html', 'w') file.write(rs) file.close() self.log = rs