Python get_soup Examples, useful_functions.get_soup Python Examples

Example #1

0

Show file

def get_concentration(driver, url):
    """Gets concentration from url"""
    soup = uf.get_soup(url)
    res = []

    #: getting names
    names = uf.get_names(soup)
    res += names

    #: getting list of matches lost recently
    history = []
    for i in soup.findAll(attrs={'class': "match__team__tournirs"}):
        history.append(i)
    loses = []
    for cnt, i in zip(range(len(names)), history):
        loses.append(get_loses(names[cnt], i))

    #: getting last lose to loser
    losers_loses = [0, 0]
    for team, cnt in zip(loses, range(2)):
        for match in team:
            statto = uf.get_statto_soup(driver, match['date'])
            teams = [uf.championat_statto[names[cnt]], uf.championat_statto[match['rival']]]
            statto_info = uf.get_statto_teams_info(teams[0], teams[1], statto)[:2]
            if uf.get_statto_teams_pos_diff(statto_info[0], statto_info[1]) < -7:
                losers_loses[cnt] = 6 - match['total'] + match['match']

    #: getting answer
    coef = [1 - x / 5 for x in losers_loses]
    res += coef
    return res

Example #2

0

Show file

def get_all_concentration(path="./extracted_concentration_13_14.txt"):
    "Getting all concentration"
    with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle:
        soup = uf.get_soup()
        handle.write("name1\tname2\tconcentration1\tconcentration2\n")
        matches = soup.findAll(attrs={'class': '_res'})
        print("Starting extracting concentraion")

        for cnt, match in enumerate(matches):
            print(cnt + 1)
            trying = 0
            error = False
            while True:
                try:
                    goal_pos_diff = get_concentration(driver, 'http://www.championat.com' + match.findAll('a')[0]['href'])
                    break
                except Exception as e:
                    trying += 1
                    print('On try {0} smth went wrong: {1}'.format(trying, e))
                    if trying == 5:
                        # winsound.Beep(2000, 2000)
                        print('Oh, well:\n\t', 'http://www.championat.com' + match.findAll('a')[0]['href'])
                        error = True
                        break
                    continue
            if error:
                continue
            handle.write('\t'.join(str(e) for e in goal_pos_diff) + '\n')
            if cnt % 5 == 4:
                handle.flush()

        print("Extraction completed")

Example #3

0

Show file

File: motivation_extraction.py Project: Albinutte/football-prediction

def get_all_motivation(path="./extracted_motivation_13_14.txt"):
    """Getting all motivation"""
    with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle:
        soup = uf.get_soup()
        handle.write("name1\tname2\tmotivation1\tmotivation2\n")
        matches = soup.findAll(attrs={'class': '_res'})
        for cnt, match in enumerate(matches):
            print(cnt + 1)
            trying = 0
            error = False
            while True:
                try:
                    motivation = get_motivation('http://www.championat.com' + match.findAll('a')[0]['href'], driver)
                    break
                except Exception as e:
                    trying += 1
                    print('On try {0} smth went wrong: {1}'.format(trying, e))
                    if trying == 5:
                        print('I give up; shit happens. Check it out!')
                        print(e)
                        error = True
                        break
                    continue
            if error:
                continue
            handle.write('\t'.join(str(e) for e in motivation) + '\n')
            if cnt % 5 == 4:
                handle.flush()
        print("Extraction completed")
        handle.flush()

Example #4

0

Show file

def get_all_motivation(path="./extracted_motivation.txt"):
    """Getting all motivation"""
    with uf.ChromeDriver() as driver, open(path, 'w', encoding='windows-1251') as handle:
        soup = uf.get_soup()
        handle.write("name1\tname2\tmotivation1\tmotivation2\n")
        matches = soup.findAll(attrs={'class': '_res'})
        for cnt, match in enumerate(matches):
            print(cnt + 1)
            trying = 0
            error = False
            while True:
                try:
                    motivation = get_motivation('http://www.championat.com' + match.findAll('a')[0]['href'], driver)
                    break
                except Exception as e:
                    trying += 1
                    print('On try {0} smth went wrong: {1}'.format(trying, e))
                    if trying == 5:
                        print('I give up; shit happens. Check it out!')
                        print(e)
                        while True:
                            time.sleep(1)
                            winsound.Beep(800, 1000)
                        error = True
                        break
                    continue
            if error:
                continue
            handle.write('\t'.join(str(e) for e in motivation) + '\n')
            if cnt % 5 == 4:
                handle.flush()
        print("Extraction completed")
        while True:
            winsound.Beep(800, 10000)

Example #5

0

Show file

def get_history(url):
    """Getting history from url"""
    soup = uf.get_soup(url)

    # : adding names
    res = []
    for i in soup.findAll(attrs={'class': 'match__team__name'}):
        res.append(i.text)

    # : getting history
    logo = soup.find(attrs={'class': re.compile('match__timeline__team__icon')})['src']
    soup2 = soup.find(attrs={'class': 'table match__history__table'})
    if soup2 is None:
        return None
    soup2 = soup2.findAll('tr', limit=2)
    h = 0
    for i in soup2:
        h += get_winner(logo, i)
    h /= 4
    res.append(h)

    #: adding result
    res += uf.get_results(soup)

    return res

Example #6

0

Show file

def get_all_goal_pos_diff(path="./extracted_goal_score_diff_13_14.txt"):
    "Getting all goal and pos diff"
    with uf.ChromeDriver() as driver, open(path, 'w') as handle:
        soup = uf.get_soup()
        handle.write("name1\tname2\tgoal_diff\tscore_diff\n")
        matches = soup.findAll(attrs={'class': '_res'})
        print("Starting extracting goal and score diffs")

        for cnt, match in enumerate(matches):
            print(cnt + 1)
            trying = 0
            error = False
            while True:
                try:
                    goal_pos_diff = get_goal_pos_diff(driver, 'http://www.championat.com' + match.findAll('a')[0]['href'])
                    break
                except Exception as e:
                    trying += 1
                    print('On try {0} smth went wrong: {1}'.format(trying, e))
                    if trying == 5:
                        print('I give up; date is probably too early')
                        error = True
                        break
                    continue
            if error:
                continue
            handle.write('\t'.join(str(e) for e in goal_pos_diff) + '\n')
            if cnt % 5 == 4:
                handle.flush()

        print("Extraction completed")

Example #7

0

Show file

def get_goal_pos_diff(driver, url):
    """Getting score and difference"""
    res = []
    soup = uf.get_soup(url)

    #: getting match date and team names
    date = uf.get_date(soup)
    teams = uf.get_names(soup)
    res += teams

    #: moving to statto url
    statto = uf.get_statto_soup(driver, date)
    statto_teams = [uf.championat_statto[x] for x in teams]

    #: getting teams goal_diff and score_diff
    values = [0, 0]
    values[0], values[1], first, last, first_goals, last_goals = \
        uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto)

    #: getting actual numbers from string values
    for i in range(len(values)):
        values[i] = get_values(values[i])
    first = get_values(first)
    last = get_values(last)
    first_goals = get_values(first_goals)
    last_goals = get_values(last_goals)

    #: counting result
    goal_diff = 0.5 + (values[0][0] - values[1][0]) / (2 * (first_goals[0] - last_goals[0]))
    pos_diff = 0.5 + (values[0][1] - values[1][1]) / (2 * (first[1] - last[1]))

    res += [goal_diff, pos_diff]
    return res

Example #8

0

Show file

File: motivation_extraction.py Project: updatedennismwangi/football-prediction

def get_motivation(url, driver):
    """Getting motivation for the match"""
    soup = uf.get_soup(url)
    res = []

    #: adding names
    res += uf.get_names(soup)
    teams = res

    #: magic with names and derbies
    for i in {0, 1}:
        if res[i] in derbies and res[1 - i] in derbies[res[i]]:
            res = res + [1, 1]
            return res

    #: season end or start
    tour = uf.get_tour_number(soup)
    if tour > 33:
        res += [1, 1]
        return res
    if tour < 16:
        res += [0, 0]
        return res

    #: moving to statto
    date = uf.get_date(soup)
    statto = uf.get_statto_soup(driver, date)
    statto_all = statto.findAll('form')[1].findAll('tr')
    statto_teams = [uf.championat_statto[x] for x in teams]

    #: getting teams scores and key positions points
    info = uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto)
    team1_score = uf.get_statto_score(info[0])
    team2_score = uf.get_statto_score(info[1])
    key_pos_scores = get_key_pos_scores(statto_all)

    #: getting min distance to key position for each team
    dist1 = min(
        list(
            filter(lambda x: x,
                   [abs(x - team1_score) for x in key_pos_scores])))
    dist2 = min(
        list(
            filter(lambda x: x,
                   [abs(x - team2_score) for x in key_pos_scores])))

    #: finally getting res
    left = TOURS - tour
    val = 1 - (dist1 / 3) / left
    if val < 0 or val > 1:
        res.append(0)
    else:
        res.append(val)
    val = 1 - (dist2 / 3) / left
    if val < 0 or val > 1:
        res.append(0)
    else:
        res.append(val)
    return res

Example #9

0

Show file

File: motivation_extraction.py Project: Albinutte/football-prediction

def get_motivation(url, driver):
    """Getting motivation for the match"""
    soup = uf.get_soup(url)
    res = []

    #: adding names
    res += uf.get_names(soup)
    teams = res

    #: magic with names and derbies
    for i in {0, 1}:
        if res[i] in derbies and res[1 - i] in derbies[res[i]]:
            res = res + [1, 1]
            return res

    #: season end or start
    tour = uf.get_tour_number(soup)
    if tour > 33:
        res += [1, 1]
        return res
    if tour < 16:
        res += [0, 0]
        return res

    #: moving to statto
    date = uf.get_date(soup)
    statto = uf.get_statto_soup(driver, date)
    statto_all = statto.findAll('form')[1].findAll('tr')
    statto_teams = [uf.championat_statto[x] for x in teams]

    #: getting teams scores and key positions points
    info = uf.get_statto_teams_info(statto_teams[0], statto_teams[1], statto)
    team1_score = uf.get_statto_score(info[0])
    team2_score = uf.get_statto_score(info[1])
    key_pos_scores = get_key_pos_scores(statto_all)

    #: getting min distance to key position for each team
    dist1 = min(list(filter(lambda x: x, [abs(x - team1_score) for x in key_pos_scores])))
    dist2 = min(list(filter(lambda x: x, [abs(x - team2_score) for x in key_pos_scores])))

    #: finally getting res
    left = TOURS - tour
    val = 1 - (dist1 / 3) / left
    if val < 0 or val > 1:
        res.append(0)
    else:
        res.append(val)
    val = 1 - (dist2 / 3) / left
    if val < 0 or val > 1:
        res.append(0)
    else:
        res.append(val)
    return res

Example #10

0

Show file

File: form_extraction.py Project: updatedennismwangi/football-prediction

def get_all_forms(path="./extracted_form_13_14.txt"):
    """Extracting all form to file"""
    with open(path, "w", encoding='windows-1251') as handle:
        soup = uf.get_soup()
        cnt = 0
        print("Starting extracting forms")
        handle.write('name1\tname2\tform1\tform2\tresult\n')
        for i in soup.findAll(attrs={'class': '_res'}):
            cnt += 1
            print(cnt)
            form = get_form('http://www.championat.com' + i.findAll('a')[0]['href'])
            if form is not None:
                handle.write('\t'.join(str(e) for e in form) + '\n')
            if cnt % 5 == 0:
                handle.flush()
        print("Forms extraction finished")

Example #11

0

Show file

File: form_extraction.py Project: Albinutte/football-prediction

def get_all_forms(path="./extracted_form_13_14.txt"):
    """Extracting all form to file"""
    with open(path, "w") as handle:
        soup = uf.get_soup()
        cnt = 0
        print("Starting extracting forms")
        handle.write('name1\tname2\tform1\tform2\tresult\n')
        for i in soup.findAll(attrs={'class': 'norm'}):
            cnt += 1
            print(cnt)
            form = get_form('http://www.championat.com' + i['href'])
            if form is not None:
                handle.write('\t'.join(str(e) for e in form) + '\n')
            if cnt % 5 == 0:
                handle.flush()
        print("Forms extraction finished")

Example #12

0

Show file

File: form_extraction.py Project: elshobaky/football-prediction

def get_all_forms(path="./extracted_form_13_14.txt"):
    """Extracting all form to file"""
    with open(path, "w", encoding="windows-1251") as handle:
        soup = uf.get_soup()
        cnt = 0
        print("Starting extracting forms")
        handle.write("name1\tname2\tform1\tform2\tresult\n")
        for i in soup.findAll(attrs={"class": "_res"}):
            cnt += 1
            print(cnt)
            form = get_form("http://www.championat.com" + i.findAll("a")[0]["href"])
            if form is not None:
                handle.write("\t".join(str(e) for e in form) + "\n")
            if cnt % 5 == 0:
                handle.flush()
        print("Forms extraction finished")

Example #13

0

Show file

File: history_extraction.py Project: updatedennismwangi/football-prediction

def get_all_history(path="./extracted_history_13_14.txt"):
    """Extracting all history to file"""
    with open(path, 'w', encoding='windows-1251') as handle:
        soup = uf.get_soup()
        print("Starting extracting history")
        handle.write('name1\tname2\thistory\tresult\n')
        matches = soup.findAll(attrs={'class': '_res'})
        for cnt, match in enumerate(matches):
            ref = 'http://www.championat.com' + match.findAll('a')[0]['href']
            print(cnt + 1)
            history = get_history(ref)
            if history is not None:
                handle.write('\t'.join(str(e) for e in history) + '\n')
            if cnt % 5 == 0:
                handle.flush()
        print("History extracting finished")

Example #14

0

Show file

def get_all_history(path="./extracted_history.txt"):
    """Extracting all history to file"""
    with open(path, 'w') as handle:
        soup = uf.get_soup()
        cnt = 0
        print("Starting extracting history")
        handle.write('name1\tname2\thistory\tresult\n')
        for i in soup.findAll(attrs={'class': 'norm'}):
            cnt += 1
            print(cnt)
            form = get_history('http://www.championat.com' + i['href'])
            if form is not None:
                handle.write('\t'.join(str(e) for e in form) + '\n')
            if cnt % 5 == 0:
                handle.flush()
        print("History extracting finished")

Example #15

0

Show file

File: form_extraction.py Project: updatedennismwangi/football-prediction

def get_form(url):
    """Gets teams and their forms from url"""
    soup = uf.get_soup(url)
    res = []

    #: adding names
    res += uf.get_names(soup)

    # : counting form
    history = []
    for i in soup.findAll(
            attrs={'class': re.compile('(_win)|(_tie)|(_lose)')}):
        history.append(i['class'])
    if len(history) < 10:
        return None
    elif len(history) < 12:
        start1 = 0
        start2 = 5
    else:
        start1 = 1
        start2 = 7
    form1 = 0
    form2 = 0
    for i in range(start1, start1 + 5):
        if history[i] == ['_win']:
            form1 += 2
        elif history[i] == ['_tie']:
            form1 += 1
    for i in range(start2, start2 + 5):
        if history[i] == ['_win']:
            form2 += 2
        elif history[i] == ['_tie']:
            form2 += 1
    form1 /= 10
    form2 /= 10
    res = res + [form1] + [form2]

    #: adding result
    res += uf.get_results(soup)

    return res

Example #16

0

Show file

File: form_extraction.py Project: elshobaky/football-prediction

def get_form(url):
    """Gets teams and their forms from url"""
    soup = uf.get_soup(url)
    res = []

    #: adding names
    res += uf.get_names(soup)

    # : counting form
    history = []
    for i in soup.findAll(attrs={"class": re.compile("(_win)|(_tie)|(_lose)")}):
        history.append(i["class"])
    if len(history) < 10:
        return None
    elif len(history) < 12:
        start1 = 0
        start2 = 5
    else:
        start1 = 1
        start2 = 7
    form1 = 0
    form2 = 0
    for i in range(start1, start1 + 5):
        if history[i] == ["_win"]:
            form1 += 2
        elif history[i] == ["_tie"]:
            form1 += 1
    for i in range(start2, start2 + 5):
        if history[i] == ["_win"]:
            form2 += 2
        elif history[i] == ["_tie"]:
            form2 += 1
    form1 /= 10
    form2 /= 10
    res = res + [form1] + [form2]

    #: adding result
    res += uf.get_results(soup)

    return res

Example #17

0

Show file

File: championat-statto__names.py Project: updatedennismwangi/football-prediction

import useful_functions as uf

champ = uf.get_soup(
    "http://www.championat.com/football/_england/548/table/all.html")
champ = champ.find(attrs={'class': 'sport__tables'})
champ = champ.findAll('a')
res_champ = []
for i in range(0, len(champ), 7):
    res_champ.append(champ[i].text)

statto = uf.get_soup(
    "http://www.statto.com/football/stats/england/premier-league/2012-2013/table"
)
statto = statto.findAll(attrs={'class': 'team'})[1:]
res_statto = []
for i in statto:
    res_statto.append(i.text)

res = {}
for i in range(len(res_champ)):
    res[res_champ[i]] = res_statto[i]
print(res)

Example #18

0

Show file

File: championat-statto__names.py Project: Albinutte/football-prediction

import useful_functions as uf


champ = uf.get_soup("http://www.championat.com/football/_england/548/table/all.html")
champ = champ.find(attrs={'class': 'sport__tables'})
champ = champ.findAll('a')
res_champ = []
for i in range(0, len(champ), 7):
    res_champ.append(champ[i].text)

statto = uf.get_soup("http://www.statto.com/football/stats/england/premier-league/2012-2013/table")
statto = statto.findAll(attrs={'class': 'team'})[1:]
res_statto = []
for i in statto:
    res_statto.append(i.text)

res = {}
for i in range(len(res_champ)):
    res[res_champ[i]] = res_statto[i]
print(res)