def get_concentration(driver, url): """Gets concentration from url""" soup = uf.get_soup(url) res = [] #: getting names names = uf.get_names(soup) res += names #: getting list of matches lost recently history = [] for i in soup.findAll(attrs={'class': "match__team__tournirs"}): history.append(i) loses = [] for cnt, i in zip(range(len(names)), history): loses.append(get_loses(names[cnt], i)) #: getting last lose to loser losers_loses = [0, 0] for team, cnt in zip(loses, range(2)): for match in team: statto = uf.get_statto_soup(driver, match['date']) teams = [uf.championat_statto[names[cnt]], uf.championat_statto[match['rival']]] statto_info = uf.get_statto_teams_info(teams[0], teams[1], statto)[:2] if uf.get_statto_teams_pos_diff(statto_info[0], statto_info[1]) < -7: losers_loses[cnt] = 6 - match['total'] + match['match'] #: getting answer coef = [1 - x / 5 for x in losers_loses] res += coef return res
def get_all_concentration(path="./extracted_concentration_13_14.txt"): "Getting all concentration" with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle: soup = uf.get_soup() handle.write("name1\tname2\tconcentration1\tconcentration2\n") matches = soup.findAll(attrs={'class': '_res'}) print("Starting extracting concentraion") for cnt, match in enumerate(matches): print(cnt + 1) trying = 0 error = False while True: try: goal_pos_diff = get_concentration(driver, 'http://www.championat.com' + match.findAll('a')[0]['href']) break except Exception as e: trying += 1 print('On try {0} smth went wrong: {1}'.format(trying, e)) if trying == 5: # winsound.Beep(2000, 2000) print('Oh, well:\n\t', 'http://www.championat.com' + match.findAll('a')[0]['href']) error = True break continue if error: continue handle.write('\t'.join(str(e) for e in goal_pos_diff) + '\n') if cnt % 5 == 4: handle.flush() print("Extraction completed")
def get_all_motivation(path="./extracted_motivation_13_14.txt"): """Getting all motivation""" with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle: soup = uf.get_soup() handle.write("name1\tname2\tmotivation1\tmotivation2\n") matches = soup.findAll(attrs={'class': '_res'}) for cnt, match in enumerate(matches): print(cnt + 1) trying = 0 error = False while True: try: motivation = get_motivation('http://www.championat.com' + match.findAll('a')[0]['href'], driver) break except Exception as e: trying += 1 print('On try {0} smth went wrong: {1}'.format(trying, e)) if trying == 5: print('I give up; shit happens. Check it out!') print(e) error = True break continue if error: continue handle.write('\t'.join(str(e) for e in motivation) + '\n') if cnt % 5 == 4: handle.flush() print("Extraction completed") handle.flush()
def get_all_motivation(path="./extracted_motivation.txt"): """Getting all motivation""" with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle: soup = uf.get_soup() handle.write("name1\tname2\tmotivation1\tmotivation2\n") matches = soup.findAll(attrs={'class': '_res'}) for cnt, match in enumerate(matches): print(cnt + 1) trying = 0 error = False while True: try: motivation = get_motivation('http://www.championat.com' + match.findAll('a')[0]['href'], driver) break except Exception as e: trying += 1 print('On try {0} smth went wrong: {1}'.format(trying, e)) if trying == 5: print('I give up; shit happens. Check it out!') print(e) while True: time.sleep(1) winsound.Beep(800, 1000) error = True break continue if error: continue handle.write('\t'.join(str(e) for e in motivation) + '\n') if cnt % 5 == 4: handle.flush() print("Extraction completed") while True: winsound.Beep(800, 10000)
def get_history(url): """Getting history from url""" soup = uf.get_soup(url) # : adding names res = [] for i in soup.findAll(attrs={'class': 'match__team__name'}): res.append(i.text) # : getting history logo = soup.find(attrs={'class': re.compile('match__timeline__team__icon')})['src'] soup2 = soup.find(attrs={'class': 'table match__history__table'}) if soup2 is None: return None soup2 = soup2.findAll('tr', limit=2) h = 0 for i in soup2: h += get_winner(logo, i) h /= 4 res.append(h) #: adding result res += uf.get_results(soup) return res
def get_all_goal_pos_diff(path="./extracted_goal_score_diff_13_14.txt"): "Getting all goal and pos diff" with uf.ChromeDriver() as driver, open(path, 'w') as handle: soup = uf.get_soup() handle.write("name1\tname2\tgoal_diff\tscore_diff\n") matches = soup.findAll(attrs={'class': '_res'}) print("Starting extracting goal and score diffs") for cnt, match in enumerate(matches): print(cnt + 1) trying = 0 error = False while True: try: goal_pos_diff = get_goal_pos_diff(driver, 'http://www.championat.com' + match.findAll('a')[0]['href']) break except Exception as e: trying += 1 print('On try {0} smth went wrong: {1}'.format(trying, e)) if trying == 5: print('I give up; date is probably too early') error = True break continue if error: continue handle.write('\t'.join(str(e) for e in goal_pos_diff) + '\n') if cnt % 5 == 4: handle.flush() print("Extraction completed")
def get_goal_pos_diff(driver, url): """Getting score and difference""" res = [] soup = uf.get_soup(url) #: getting match date and team names date = uf.get_date(soup) teams = uf.get_names(soup) res += teams #: moving to statto url statto = uf.get_statto_soup(driver, date) statto_teams = [uf.championat_statto[x] for x in teams] #: getting teams goal_diff and score_diff values = [0, 0] values[0], values[1], first, last, first_goals, last_goals = \ uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto) #: getting actual numbers from string values for i in range(len(values)): values[i] = get_values(values[i]) first = get_values(first) last = get_values(last) first_goals = get_values(first_goals) last_goals = get_values(last_goals) #: counting result goal_diff = 0.5 + (values[0][0] - values[1][0]) / (2 * (first_goals[0] - last_goals[0])) pos_diff = 0.5 + (values[0][1] - values[1][1]) / (2 * (first[1] - last[1])) res += [goal_diff, pos_diff] return res
def get_motivation(url, driver): """Getting motivation for the match""" soup = uf.get_soup(url) res = [] #: adding names res += uf.get_names(soup) teams = res #: magic with names and derbies for i in {0, 1}: if res[i] in derbies and res[1 - i] in derbies[res[i]]: res = res + [1, 1] return res #: season end or start tour = uf.get_tour_number(soup) if tour > 33: res += [1, 1] return res if tour < 16: res += [0, 0] return res #: moving to statto date = uf.get_date(soup) statto = uf.get_statto_soup(driver, date) statto_all = statto.findAll('form')[1].findAll('tr') statto_teams = [uf.championat_statto[x] for x in teams] #: getting teams scores and key positions points info = uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto) team1_score = uf.get_statto_score(info[0]) team2_score = uf.get_statto_score(info[1]) key_pos_scores = get_key_pos_scores(statto_all) #: getting min distance to key position for each team dist1 = min( list( filter(lambda x: x, [abs(x - team1_score) for x in key_pos_scores]))) dist2 = min( list( filter(lambda x: x, [abs(x - team2_score) for x in key_pos_scores]))) #: finally getting res left = TOURS - tour val = 1 - (dist1 / 3) / left if val < 0 or val > 1: res.append(0) else: res.append(val) val = 1 - (dist2 / 3) / left if val < 0 or val > 1: res.append(0) else: res.append(val) return res
def get_motivation(url, driver): """Getting motivation for the match""" soup = uf.get_soup(url) res = [] #: adding names res += uf.get_names(soup) teams = res #: magic with names and derbies for i in {0, 1}: if res[i] in derbies and res[1 - i] in derbies[res[i]]: res = res + [1, 1] return res #: season end or start tour = uf.get_tour_number(soup) if tour > 33: res += [1, 1] return res if tour < 16: res += [0, 0] return res #: moving to statto date = uf.get_date(soup) statto = uf.get_statto_soup(driver, date) statto_all = statto.findAll('form')[1].findAll('tr') statto_teams = [uf.championat_statto[x] for x in teams] #: getting teams scores and key positions points info = uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto) team1_score = uf.get_statto_score(info[0]) team2_score = uf.get_statto_score(info[1]) key_pos_scores = get_key_pos_scores(statto_all) #: getting min distance to key position for each team dist1 = min(list(filter(lambda x: x, [abs(x - team1_score) for x in key_pos_scores]))) dist2 = min(list(filter(lambda x: x, [abs(x - team2_score) for x in key_pos_scores]))) #: finally getting res left = TOURS - tour val = 1 - (dist1 / 3) / left if val < 0 or val > 1: res.append(0) else: res.append(val) val = 1 - (dist2 / 3) / left if val < 0 or val > 1: res.append(0) else: res.append(val) return res
def get_all_forms(path="./extracted_form_13_14.txt"): """Extracting all form to file""" with open(path, "w", encoding='windows-1251') as handle: soup = uf.get_soup() cnt = 0 print("Starting extracting forms") handle.write('name1\tname2\tform1\tform2\tresult\n') for i in soup.findAll(attrs={'class': '_res'}): cnt += 1 print(cnt) form = get_form('http://www.championat.com' + i.findAll('a')[0]['href']) if form is not None: handle.write('\t'.join(str(e) for e in form) + '\n') if cnt % 5 == 0: handle.flush() print("Forms extraction finished")
def get_all_forms(path="./extracted_form_13_14.txt"): """Extracting all form to file""" with open(path, "w") as handle: soup = uf.get_soup() cnt = 0 print("Starting extracting forms") handle.write('name1\tname2\tform1\tform2\tresult\n') for i in soup.findAll(attrs={'class': 'norm'}): cnt += 1 print(cnt) form = get_form('http://www.championat.com' + i['href']) if form is not None: handle.write('\t'.join(str(e) for e in form) + '\n') if cnt % 5 == 0: handle.flush() print("Forms extraction finished")
def get_all_forms(path="./extracted_form_13_14.txt"): """Extracting all form to file""" with open(path, "w", encoding="windows-1251") as handle: soup = uf.get_soup() cnt = 0 print("Starting extracting forms") handle.write("name1\tname2\tform1\tform2\tresult\n") for i in soup.findAll(attrs={"class": "_res"}): cnt += 1 print(cnt) form = get_form("http://www.championat.com" + i.findAll("a")[0]["href"]) if form is not None: handle.write("\t".join(str(e) for e in form) + "\n") if cnt % 5 == 0: handle.flush() print("Forms extraction finished")
def get_all_history(path="./extracted_history_13_14.txt"): """Extracting all history to file""" with open(path, 'w', encoding='windows-1251') as handle: soup = uf.get_soup() print("Starting extracting history") handle.write('name1\tname2\thistory\tresult\n') matches = soup.findAll(attrs={'class': '_res'}) for cnt, match in enumerate(matches): ref = 'http://www.championat.com' + match.findAll('a')[0]['href'] print(cnt + 1) history = get_history(ref) if history is not None: handle.write('\t'.join(str(e) for e in history) + '\n') if cnt % 5 == 0: handle.flush() print("History extracting finished")
def get_all_history(path="./extracted_history.txt"): """Extracting all history to file""" with open(path, 'w') as handle: soup = uf.get_soup() cnt = 0 print("Starting extracting history") handle.write('name1\tname2\thistory\tresult\n') for i in soup.findAll(attrs={'class': 'norm'}): cnt += 1 print(cnt) form = get_history('http://www.championat.com' + i['href']) if form is not None: handle.write('\t'.join(str(e) for e in form) + '\n') if cnt % 5 == 0: handle.flush() print("History extracting finished")
def get_form(url): """Gets teams and their forms from url""" soup = uf.get_soup(url) res = [] #: adding names res += uf.get_names(soup) # : counting form history = [] for i in soup.findAll( attrs={'class': re.compile('(_win)|(_tie)|(_lose)')}): history.append(i['class']) if len(history) < 10: return None elif len(history) < 12: start1 = 0 start2 = 5 else: start1 = 1 start2 = 7 form1 = 0 form2 = 0 for i in range(start1, start1 + 5): if history[i] == ['_win']: form1 += 2 elif history[i] == ['_tie']: form1 += 1 for i in range(start2, start2 + 5): if history[i] == ['_win']: form2 += 2 elif history[i] == ['_tie']: form2 += 1 form1 /= 10 form2 /= 10 res = res + [form1] + [form2] #: adding result res += uf.get_results(soup) return res
def get_form(url): """Gets teams and their forms from url""" soup = uf.get_soup(url) res = [] #: adding names res += uf.get_names(soup) # : counting form history = [] for i in soup.findAll(attrs={"class": re.compile("(_win)|(_tie)|(_lose)")}): history.append(i["class"]) if len(history) < 10: return None elif len(history) < 12: start1 = 0 start2 = 5 else: start1 = 1 start2 = 7 form1 = 0 form2 = 0 for i in range(start1, start1 + 5): if history[i] == ["_win"]: form1 += 2 elif history[i] == ["_tie"]: form1 += 1 for i in range(start2, start2 + 5): if history[i] == ["_win"]: form2 += 2 elif history[i] == ["_tie"]: form2 += 1 form1 /= 10 form2 /= 10 res = res + [form1] + [form2] #: adding result res += uf.get_results(soup) return res
import useful_functions as uf champ = uf.get_soup( "http://www.championat.com/football/_england/548/table/all.html") champ = champ.find(attrs={'class': 'sport__tables'}) champ = champ.findAll('a') res_champ = [] for i in range(0, len(champ), 7): res_champ.append(champ[i].text) statto = uf.get_soup( "http://www.statto.com/football/stats/england/premier-league/2012-2013/table" ) statto = statto.findAll(attrs={'class': 'team'})[1:] res_statto = [] for i in statto: res_statto.append(i.text) res = {} for i in range(len(res_champ)): res[res_champ[i]] = res_statto[i] print(res)
import useful_functions as uf champ = uf.get_soup("http://www.championat.com/football/_england/548/table/all.html") champ = champ.find(attrs={'class': 'sport__tables'}) champ = champ.findAll('a') res_champ = [] for i in range(0, len(champ), 7): res_champ.append(champ[i].text) statto = uf.get_soup("http://www.statto.com/football/stats/england/premier-league/2012-2013/table") statto = statto.findAll(attrs={'class': 'team'})[1:] res_statto = [] for i in statto: res_statto.append(i.text) res = {} for i in range(len(res_champ)): res[res_champ[i]] = res_statto[i] print(res)