def worst_picks(self, group): '''takes an int group number, returns the worsr pick data as a tuple, including a dict of player name and numbers''' print('worst picks calc: ', group) worst_score = max( utils.formatRank(x.get('rank')) - x.get('handicap') for k, x in self.score_dict.items() if k != 'info' and x.get('group') == group) worst_list = { v['pga_num']: k for (k, v) in self.score_dict.items() if v.get('group') == group and utils.formatRank(v.get('rank')) - v.get('handicap') == worst_score } #cuts = len([v for v in self.score_dict.values() if v.get('group') == group and v.get('rank') in self.not_playing_list]) print('worst: ', worst_list, worst_score) return worst_list, worst_score
def cut_data(self): print ('starting cut calc info: ', self.score_dict.get('info')) cut_calc_start = datetime.now() try: if self.score_dict.get('info').get('round_status') == 'Not Started' and self.score_dict.get('info').get('round') == 1 and self.tournament.has_cut: cut_num = self.tournament.saved_cut_num elif self.tournament.has_cut: post_cut_wd = len([v for k,v in self.score_dict.items() if k!= 'info' and v.get('total_score') in self.tournament.not_playing_list() and \ v.get('r3') != '--']) #if len([v for (k,v) in self.score_dict.items() if k != 'info' and v.get('total_score') == "CUT"]) != 0: if len([v for (k,v) in self.score_dict.items() if k != 'info' and v.get('rank') == "CUT"]) != 0: #changed for cbs print ('cuts exists, inside if') #changed to rank from total score for cbs cut_num = len([v for (k,v) in self.score_dict.items() if k != 'info' and v.get('rank') not in self.tournament.not_playing_list()]) \ + post_cut_wd +1 if self.tournament.pga_tournament_num == '018': cut_num = int(((cut_num-1)/2) + 1) print ('caclulated cut num', cut_num) if not self.score_dict.get('info').get('cut_line'): #this will be the wrong number, fix at some point cut_line = min(int(utils.score_as_int(v.get('total_score'))) for k, v in self.score_dict.items() if k != 'info' and v.get('rank') == "CUT") -1 self.score_dict['info'].update({'cut_line': 'Actual Cut Line: ' + str(utils.format_score(cut_line))}) else: print ('no cuts in leaderboadr, in else') cut_num = min(utils.formatRank(x.get('rank')) for k, x in self.score_dict.items() if k != 'info' and int(utils.formatRank(x.get('rank'))) > self.tournament.saved_cut_num) print (cut_num) if self.score_dict.get('cut_line') == None: print ('in cut line none') cut_line = max(int(utils.score_as_int(v.get('total_score'))) for k, v in self.score_dict.items() if k != 'info' and int(utils.formatRank(v.get('rank'))) < cut_num and \ v.get('total_score') not in self.tournament.not_playing_list()) print ('2 ', cut_line) self.score_dict['info'].update({'cut_line': 'Projected Cut Line: ' + str(utils.format_score(cut_line))}) else: cut_num = len([v for k, v in self.score_dict.items() if k != 'info' and v.get('total_score') not in self.tournament.not_playing_list()]) +1 self.score_dict['info'].update({'cut_num': cut_num}) except Exception as e: print ('cut nun calc issue: ', e) cut_num = self.tournament.saved_cut_num self.score_dict['info'].update({'cut_num': cut_num}) if self.score_dict.get('info').get('cut_line') == None: self.score_dict['info'].update({'cut_line': 'no cut line'}) print ('cut num duration: ', datetime.now() - cut_calc_start) print ('info: ', self.score_dict['info']) return self.score_dict['info']
def get_data(self): start = datetime.now() print('scraping golf espn com') sd, created = ScoreDict.objects.get_or_create( tournament=self.tournament) if self.setup: print('set up mode, scraping') elif self.tournament.complete: print('T Complete returning saved score dict ', 'sd saved time: ', sd.updated, 'current time: ', datetime.utcnow().replace(tzinfo=pytz.utc)) sd.data.get('info').update({'dict_status': 'from_db'}) #return OrderedDict(sd.data) return sd.sorted_dict() elif not created and (sd.updated + timedelta(minutes=1) ) > datetime.utcnow().replace(tzinfo=pytz.utc): print('returning saved score dict ', 'sd saved time: ', sd.updated, 'current time: ', datetime.utcnow().replace(tzinfo=pytz.utc)) sd.data.get('info').update({'dict_status': 'from_db'}) #return OrderedDict(sd.data) return sd.sorted_dict() try: score_dict = {} html = urllib.request.urlopen(self.url) soup = BeautifulSoup(html, 'html.parser') leaderboard = soup.find_all('tbody', {'class': 'Table__TBODY'}) status = soup.find('div', {'class', 'status'}).span.text t_name = soup.find('h1', {'class', 'Leaderboard__Event__Title'}).text start = datetime.now() #using T numb for URL construction so can trust the data #if self.tournament.pga_tournament_num != '999' and t_name != self.tournament.name and not self.tournament.ignore_name_mismatch and not self.ignore_name_mismatch: # match = utils.check_t_names(t_name, self.tournament) # if not match: # print ('tournament mismatch: espn name: ', t_name, 'DB name: ', self.tournament.name) # return {} print('espn T Name: ', t_name) print('status: ', status, status[0:5]) try: score_dict['info'] = { 'round': int(status.split(' ')[1]), 'complete': False, 'round_status': status } except Exception as e: if status in ["Final", "Medal Official"]: score_dict['info'] = { 'round': 4, 'complete': True, 'round_status': status } elif status == "Playoff - Play Complete": score_dict['info'] = { 'round': 4, 'complete': False, 'round_status': status } elif status == "Tournament Field": score_dict['info'] = { 'round': 1, 'complete': False, 'round_status': "Not Started" } elif status[0:5] == "First": score_dict['info'] = { 'round': 1, 'complete': False, 'round_status': status } elif status[0:5] == "Secon": score_dict['info'] = { 'round': 2, 'complete': False, 'round_status': status } elif status[0:5] == "Third": score_dict['info'] = { 'round': 3, 'complete': False, 'round_status': status } elif status[0:5] == "Final Round": score_dict['info'] = { 'round': 4, 'complete': False, 'round_status': status } else: score_dict['info'] = { 'round': 0, 'complete': False, 'round_status': status } score_dict['info'].update({'source': 'espn'}) playoff_sect = soup.find('div', {'class': 'leaderboard__playoff--table'}) if playoff_sect == None: playoff = False score_dict['info'].update({'playoff': False}) else: playoff = True score_dict['info'].update({'playoff': True}) if playoff: table = leaderboard[1].find_all('tr') else: table = leaderboard[0].find_all('tr') for i, row in enumerate(table): td = row.find_all('td') #print ('TD len: ', len(td), td) #for t in td: # print (t) #print (row['class'], len(row.find_all('td'))) #print (row.a['href'].split('/')) #print (len(row.find_all('td'))) if len(td) == 1 and 'cutline' in row['class']: if "Projected" in td[0].text: score_dict['info'].update({'cut_line': td[0].text}) else: score_dict['info'].update( {'cut_line': 'Cut Line: ' + td[0].text[-2:]}) #elif len(row.find_all('td')) == 2: # before start elif len( row.find_all('td') ) == 3: # before start changed from 2 - 3 for Sentry 2022 if self.tournament.pga_tournament_num == '999': score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'pos': td[2].text, 'flag': td[0].img.get('src'), 'change': '', } else: score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'pos': td[2].text, 'change': '' } #elif len(td) == 11: #afer round 1 elif len(td) == 11 and not score_dict.get('info').get( 'complete'): if td[3].text in self.tournament.not_playing_list(): rank = td[3].text else: rank = td[1].text score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'rank': rank, #'change': str(td[1].span), 'change': '', 'round_score': td[4].text, 'total_score': td[3].text, 'thru': td[5].text, 'r1': td[6].text, 'r2': td[7].text, 'r3': td[8].text, 'r4': td[9].text, 'tot_strokes': td[10].text, } elif len(td) == 12 and not score_dict.get('info').get( 'complete'): #print (row.a.text, td[1].text) if td[4].text in self.tournament.not_playing_list(): rank = td[4].text else: rank = td[1].text score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'rank': rank, 'change': str(td[2].span), #'change': '', 'round_score': td[5].text, 'total_score': td[4].text, 'thru': td[6].text, 'r1': td[7].text, 'r2': td[8].text, 'r3': td[9].text, 'r4': td[10].text, 'tot_strokes': td[11].text, } elif (len(td) == 10 and score_dict.get('info').get('round') != 1): #tournament complete - doesn't work when complete print('espn scrape in len 10 logic') if td[2].text in self.tournament.not_playing_list(): rank = td[2].text else: rank = td[0].text score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'rank': rank, #'change': str(td[1].span), 'change': '', 'round_score': '', 'total_score': td[2].text, 'thru': "F", 'r1': td[3].text, 'r2': td[4].text, 'r3': td[5].text, 'r4': td[6].text, 'tot_strokes': td[7].text, } elif len(td) == 8 and score_dict.get('info').get( 'round' ) != 1 and self.tournament.pga_tournament_num == '999': #olympics complete if i == 0: rank = 1 elif i == 1: rank = 2 elif i == 2: rank = 3 else: rank = td[0].text score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], #'rank': td[0].text, 'rank': rank, 'change': '', #'round_score': td[3].text, 'total_score': td[2].text, 'thru': "F", 'r1': td[3].text, 'r2': td[4].text, 'r3': td[5].text, 'r4': td[6].text, 'tot_strokes': td[7].text, } elif score_dict.get('info').get('complete'): #print (td[1].text, td[2].text, td[3].text, td[4].text) if td[3].text in self.tournament.not_playing_list(): #print (td[1].text, td[2].text, td[3].text, td[4].text) rank = td[3].text else: rank = td[1].text score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'rank': rank, #'change': str(td[1].span), 'change': '', 'round_score': '', 'total_score': td[3].text, 'thru': "F", 'r1': td[4].text, 'r2': td[5].text, 'r3': td[6].text, 'r4': td[7].text, 'tot_strokes': td[8].text, } else: #round 1 make this fit that score_dict[row.a.text] = { 'pga_num': row.a['href'].split('/')[7], 'rank': td[0].text, 'change': '', 'round_score': td[3].text, 'total_score': td[2].text, 'thru': td[4].text, 'r1': td[5].text, 'r2': td[6].text, 'r3': td[7].text, 'r4': td[8].text, 'tot_strokes': td[9].text, } if len(td) > 1 and Field.objects.filter( golfer__espn_number=score_dict[row.a.text]['pga_num'], tournament=self.tournament).exists(): f = Field.objects.get( golfer__espn_number=score_dict[row.a.text]['pga_num'], tournament=self.tournament) score_dict[row.a.text].update({ 'handicap': f.handicap(), 'group': f.group.number }) #print (score_dict['Sungjae Im']) #print (score_dict['Patrick Reed']) #print ([v for v in score_dict.values() if v.get('rank') == '-']) print('info before cut num calc: ', score_dict.get('info'), 'scrape duration: ', datetime.now() - start) cut_calc_start = datetime.now() try: if score_dict.get('info').get( 'round_status') == 'Not Started' and score_dict.get( 'info').get( 'round') == 1 and self.tournament.has_cut: cut_num = self.tournament.saved_cut_num elif self.tournament.has_cut: post_cut_wd = len([v for k,v in score_dict.items() if k!= 'info' and v.get('total_score') in self.tournament.not_playing_list() and \ v.get('r' + str(self.tournament.saved_cut_round + 1)) != '--']) print('post cut WD ', post_cut_wd) #if score_dict.get('info').get('cut_line') == None: #print ('no cut line exists') #print (len([v for (k,v) in score_dict.items() if k != 'info' and v.get('total_score') == "CUT"])) if len([ v for (k, v) in score_dict.items() if k != 'info' and v.get('total_score') == "CUT" ]) != 0: print('cuts exists, inside if') cut_num = len([v for (k,v) in score_dict.items() if k != 'info' and v.get('total_score') not in self.tournament.not_playing_list()]) \ + post_cut_wd +1 else: print('no cuts in leaderboadr, in else') #cut_num = self.tournament.saved_cut_num cut_num = min( utils.formatRank(x.get('rank')) for k, x in score_dict.items() if k != 'info' and int(utils.formatRank(x.get( 'rank'))) > self.tournament.saved_cut_num) print(cut_num) if score_dict.get('cut_line') == None: print('in cut line none') cut_line = max(int(utils.score_as_int(v.get('total_score'))) for k, v in score_dict.items() if k != 'info' and int(utils.formatRank(v.get('rank'))) < cut_num and \ v.get('total_score') not in self.tournament.not_playing_list()) print('2 ', cut_line) score_dict['info'].update({ 'cut_line': 'Projected Cut Line: ' + str(utils.format_score(cut_line)) }) else: cut_num = len([ v for k, v in score_dict.items() if k != 'info' and v.get('total_score') not in self.tournament.not_playing_list() ]) + 1 score_dict['info'].update({'cut_num': cut_num}) except Exception as e: print('cut nun calc issue: ', e) cut_num = self.tournament.saved_cut_num score_dict['info'].update({'cut_num': cut_num}) if score_dict.get('info').get('cut_line') == None: score_dict['info'].update({'cut_line': 'no cut line'}) print('cut num duration: ', datetime.now() - cut_calc_start) print('info: ', score_dict['info']) # if {k:v for k,v in sd.data.items() if k != 'info'} == \ # {k:v for k,v in score_dict.items() if k != 'info'} and \ # {k:v for k,v in sd.data.get('info').items() if k != 'dict_status'} == \ # {k:v for k,v in score_dict.get('info').items() if k != 'dict_status'}: # score_dict.get('info').update({'dict_status': 'no change'}) # else: # score_dict.get('info').update({'dict_status': 'updated'}) sd.data = OrderedDict(score_dict) sd.save() print('espn scrape duration: ', datetime.now() - start) return OrderedDict(score_dict) except Exception as e: print('issue scraping espn', e) return {}
def update_scores(self, optimal_picks=None): start = datetime.now() #print (self.score_dict) if self.tournament.complete: return cut_num = self.score_dict.get('info').get('cut_num') print('after cut num', datetime.now() - start) if optimal_picks == None: optimal_picks = {} for g in Group.objects.filter(tournament=self.tournament): opt = self.optimal_picks(g.number) optimal_picks[str(g.number)] = { 'golfer': opt[0], 'rank': opt[1], 'cuts': opt[2], 'total_golfers': g.playerCnt } print('after optimal', datetime.now() - start) #print (optimal_picks) #curr_round = self.tournament.saved_round curr_round = self.score_dict.get('info').get('round') print('after round', datetime.now() - start) #BonusDetails.objects.filter(tournament=self.tournament).update(best_in_group_bonus=0) BonusDetails.objects.filter(tournament=self.tournament, bonus_type='5').update(bonus_points=0) print('starting pick loop time to here', datetime.now() - start) loop_start = datetime.now() for p in Picks.objects.filter(playerName__tournament=self.tournament ).values('playerName').distinct(): pick_loop_start = datetime.now() #print ('PICK ', p) pick = Picks.objects.filter( playerName__pk=p.get('playerName')).first() sd, sd_created = ScoreDetails.objects.get_or_create(user=pick.user, pick=pick) #if sd.filter(pick=pick, today_score__in=self.not_playing_list).exists() and self.score_dict.get('info').get('round') > 2: try: temp = [ x for x in self.score_dict.values() if x.get('pga_num') == pick.playerName.golfer.espn_number ] #print ('temp', temp) data = temp[0] #print ('data', data) #print ('SD: ', sd, sd.user, sd.gross_score, int(utils.formatRank(data.get('rank')))) if ScoreDetails.objects.filter(pick__playerName__tournament=self.tournament, pick__playerName__golfer__espn_number=pick.playerName.golfer.espn_number) \ .exclude(gross_score=utils.formatRank(data.get('rank')), thru=data.get('thru'), toPar=data.get('total_score')).count() == 0: print('skipping no change', pick.playerName, datetime.now() - pick_loop_start) self.pick_bonuses(sd, pick, optimal_picks, data) continue print('thru skip checks') if data.get('rank') == "CUT": score = cut_num + self.cut_penalty(pick) elif data.get('rank') in [ "WD", "DQ" ] or (data.get('rank') in self.cut_indicators and data.get('total_score') in ['WD', 'DQ']): print('WD/DQ: ', pick, data) score = self.get_wd_score(pick) + self.cut_penalty(pick) else: if self.tournament.has_cut and int( utils.formatRank(data.get('rank'))) > cut_num: score = cut_num + self.cut_penalty(pick) else: score = utils.formatRank(data.get('rank')) Picks.objects.filter( playerName__tournament=self.tournament, playerName=pick.playerName).update(score=score) #this doesn't work, only creates one SD record rater than all #sd, sd_created = ScoreDetails.objects.get_or_create(user=pick.user, pick=pick) sd.score = score - pick.playerName.handicap() sd.gross_score = pick.score if data.get('rank') == "CUT" or \ data.get('rank') in ["WD", 'DQ'] and curr_round < 3: sd.today_score = "CUT" sd.thru = "CUT" elif data.get('rank') in ["WD", 'DQ']: sd.today_score = "WD" sd.thru = "WD" else: sd.today_score = data.get('round_score') sd.thru = data.get('thru') sd.toPar = data.get('total_score') sd.sod_position = data.get('change') ScoreDetails.objects.filter( pick__playerName__tournament=self.tournament, pick__playerName=pick.playerName).update( score=sd.score, gross_score=score, today_score=sd.today_score, thru=sd.thru, toPar=sd.toPar, sod_position=sd.sod_position) except Exception as e: #### fix this, doesn't work. print('withdraw?', pick, e) pick.score = cut_num #pick.save() Picks.objects.filter( playerName__tournament=self.tournament, playerName=pick.playerName).update( score=cut_num - pick.playerName.handicap()) #sd, sd_created = ScoreDetails.objects.get_or_create(user=pick.user, pick=pick) #sd.score=pick.score - pick.playerName.handicap() sd.score = pick.score #sd.gross_score = score sd.gross_score = self.get_wd_score(pick) sd.today_score = "WD" sd.thru = "WD" sd.toPar = "WD" sd.sod_position = '-' #sd.save() comment for bulk update ScoreDetails.objects.filter( pick__playerName__tournament=self.tournament, pick__playerName=pick.playerName).update( #score=cut_num - pick.playerName.handicap(), score=cut_num, gross_score=sd.gross_score, today_score=sd.today_score, thru=sd.thru, sod_position=sd.sod_position, toPar=sd.toPar) data = {} self.pick_bonuses(sd, pick, optimal_picks, data) print( 'pick loop: ', pick.playerName, ' ', Picks.objects.filter( playerName__pk=p.get('playerName')).count(), ' ', datetime.now() - pick_loop_start) ## end of bulk update section if self.score_dict.get('info').get('complete') == True: self.tournament.complete = True self.tournament.score_update_time = datetime.now(tz=timezone.utc) self.tournament.save() print('score loop duration', datetime.now() - loop_start) print('update_scores duration', datetime.now() - start) return
def scrape(self): start = datetime.now() score_dict = {} options = ChromeOptions() user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36' options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument(f'user-agent={user_agent}') driver = Chrome(options=options) print('driver pre url: ', datetime.now() - start) driver.get(self.url) print('driver after url: ', datetime.now() - start) #soup = BeautifulSoup(driver.page_source, 'html.parser') try: #print (driver.page_source) lb = WebDriverWait(driver, 60).until( EC.presence_of_element_located( (By.ID, "leaderBoardPlayersTraditionalContent"))) print('a') soup = BeautifulSoup(driver.page_source, 'html.parser') print('b') table = (soup.find("div", {'id': 'leaderBoardPlayersTraditionalContent'})) print('c') leaderboard = soup.find( 'div', {'id': 'leaderBoardPlayersTraditionalContent'}) player_rows = soup.find_all('div', {'class': 'playerRow'}) score_dict = {} for row in player_rows: masters_name = row.find('div', { 'class': 'playerName' }).find('div', { 'class': 'data' }).text for c in row['class']: if c[:2] == 'pr': player_num = c[2:] else: pass try: try: golfer = Golfer.objects.get(golfer_pga_num=player_num) field = Field.objects.get(tournament=self.tournament, golfer=golfer) player_name = field.playerName except Exception: if Field.objects.filter( tournament=self.tournament, playerName__contains=masters_name.split(',') [0].split(' ')[0].capitalize()).exists(): o = Field.objects.get( tournament=self.tournament, playerName__contains=masters_name.split( ',')[0].split(' ')[0].capitalize()) player_name = o.playerName else: print('cant find player', masters_player) stats = row.find('div', {'class': 'playerStatContainer'}) pos = row.find('div', { 'class': 'pos' }).find('div', { 'class': 'data' }).text if pos != "WD": total = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'attr': 'topar' }).find('div', { 'class': 'data' }).text today = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'attr': 'today3' }).find('div', { 'class': 'data' }).text thru = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'thru' }).find('div', { 'class': 'data' }).text r1 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r1' }).find('div', { 'class': 'data' }).text if r1 == '': r1 = '--' r2 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r2' }).find('div', { 'class': 'data' }).text if r2 == '': r2 = '--' r3 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r3' }).find('div', { 'class': 'data' }).text if r3 == '': r3 = '--' r4 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r4' }).find('div', { 'class': 'data' }).text if r4 == '': r4 = '--' else: total = '' today = '' thru = '' r1 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r1' }).find('div', { 'class': 'data' }).text if r1 == '': r1 = '--' r2 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r2' }).find('div', { 'class': 'data' }).text if r2 == '': r2 = '--' r3 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r3' }).find('div', { 'class': 'data' }).text if r3 == '': r3 = '--' r4 = row.find('div', { 'class': 'playerStatContainer' }).find('div', { 'class': 'r4' }).find('div', { 'class': 'data' }).text if r4 == '': r4 = '--' score_dict[player_name] = { 'rank': pos, 'change': 'n/a', \ 'thru': thru, 'round_score': today, 'total_score': total , 'r1': r1, 'r2': r2, 'r3': r3, 'r4': r4 } except Exception as e: print('row execptino', e) cut_num = len([ x for x in score_dict.values() if int(utils.formatRank(x['rank'])) <= 50 and x['rank'] not in self.tournament.not_playing_list() ]) + 1 cut_score = [ x for x in score_dict.values() if int(utils.formatRank(x['rank'])) <= 50 and x['rank'] not in self.tournament.not_playing_list() ] + 1 self.tournament.cut_score = 'Cut Number ' + str(cut) self.tournament.save() return (score_dict) except Exception as e: print('scrape issues', e)