コード例 #1
0
ファイル: whoscored.py プロジェクト: srgfrlv/whoscored
def get_seasons(tournament_id, overwrite=False):
    if seasons.find_one({'tournamentId': tournament_id}) and not overwrite:
        print('Seasons already exist')
        return True

    tournament = tournaments.find_one({'tournamentId': tournament_id})
    page = SITE+'/Regions/{regionId}/Tournaments/{tournamentId}'.format(**tournament)
    r = requests.get(page, headers=HEADERS)
    print(r.url)

    if r.status_code != 200:
        return False

    content = html.fromstring(r.text)
    season_links = content.xpath('//select[@id="seasons"]/option/@value')
    season_names = content.xpath('//select[@id="seasons"]/option/text()')

    for season_link, season_name in zip(season_links, season_names):
        season = {
            'seasonId': int(season_link.split('/')[-1]),
            'name': season_name,
            'regionId': tournament['regionId'],
            'tournamentId': tournament['tournamentId'],
        }
        seasons.update_one({'seasonId': season['seasonId']},
                           {'$setOnInsert': {
                               'name': season['name'],
                               'regionId': tournament['regionId'],
                               'tournamentId': tournament['tournamentId']}},
                           upsert=True)

    # Sometimes the tournament doesn't have a name in the main menu - use the title on the page
    if tournament['name'] == '':
        tournament_name = content.xpath('//h1[@class="tournament-header"]/text()')[0].strip()
        tournaments.update_one({'tournamentId': tournament['tournamentId']}, {'$se': {'name': tournament_name}})

    # Some tournaments don't show up in the main menu - take a fuller list from the dropdown menu
    tournament_links = content.xpath('//select[@id="tournaments"]/option/@value')
    tournament_names = content.xpath('//select[@id="tournaments"]/option/text()')

    for tournament_link, tournament_name in zip(tournament_links, tournament_names):
        new_tournament = {
            'tournamentId': int(tournament_link.split('/')[-1]),
            'name': tournament_name,
            'regionId': tournament['regionId'],
        }
        tournaments.update_one({'tournamentId': new_tournament['tournamentId']},
                               {'$setOnInsert': {
                                   'name': new_tournament['name'],
                                   'regionId': new_tournament['regionId']}},
                               upsert=True)

    wait()
コード例 #2
0
ファイル: whoscored.py プロジェクト: srgfrlv/whoscored
def get_stages(season_id, overwrite=False):
    if stages.find_one({'seasonId': season_id}) and not overwrite:
        print('Stages already exist')
        return True

    season = seasons.find_one({'seasonId': season_id})
    page = SITE+'/Regions/{regionId}/Tournaments/{tournamentId}/Seasons/{seasonId}'.format(**season)
    r = requests.get(page, headers=HEADERS)
    print(r.url)

    if r.status_code != 200:
        return False

    content = html.fromstring(r.text)
    stage_links = content.xpath("//select[@id='stages']/option/@value")
    stage_names = content.xpath("//select[@id='stages']/option/text()")

    for stage_link, stage_name in zip(stage_links, stage_names):
        stages.update_one({'stageId': int(stage_link.split('/')[-1])},
                          {'$setOnInsert': {
                              'name': stage_name,
                              'regionId': season['regionId'],
                              'tournamentId': season['tournamentId'],
                              'seasonId': season['seasonId']}},
                          upsert=True)

    if len(stage_links) == 0:
        fixture_link = content.xpath("//div[@id='sub-navigation']/ul/li/a[text()='Fixtures']/@href")[0]
        stages.update_one({'stageId': int(fixture_link.split("/")[-3])},
                          {'$setOnInsert': {
                              'name': content.xpath('//h1/text()')[0].strip(),
                              'regionId': season['regionId'],
                              'tournamentId': season['tournamentId'],
                              'seasonId': season['seasonId']}},
                          upsert=True)

    wait()
コード例 #3
0
def load_data(limit=0):
    BODYPARTS = ['RightFoot', 'LeftFoot', 'Head', 'OtherBodyPart']
    PATTERNOFPLAY = ['RegularPlay', 'FastBreak', 'SetPiece', 'FromCorner', 'Penalty', 'DirectFreekick', 'ThrowinSetPiece']
    SHOTLOCATION = ['SmallBoxLeft', 'SmallBoxCentre', 'SmallBoxRight',
                    'DeepBoxLeft', 'BoxLeft', 'BoxCentre', 'BoxRight', 'DeepBoxRight',
                    'OutOfBoxDeepLeft', 'OutOfBoxLeft', 'OutOfBoxCentre', 'OutOfBoxRight', 'OutOfBoxDeepRight',
                    'ThirtyFivePlusLeft', 'ThirtyFivePlusCentre', 'ThirtyFivePlusRight']

    l, c, r = array([104, 34]), array([104, 38]), array([104, 42])
    shots = []
    for event in events.find({'isShot': True, 'isOwnGoal': {'$exists': False}}).limit(limit):
        shot = dict()
        shot['id'] = int(event['id'])
        shot['Goal'] = event.get('isGoal') is True
        shot['X'] = 1.04 * event['x']
        shot['Y'] = 0.76 * event['y']

        p = array([shot['X'], shot['Y']])
        shot['Distance'] = norm(p - c)
        shot['Angle'] = arccos(dot(p - l, p - r) / norm(p - l) / norm(p - r))

        shot_qualifiers = {q['type']['displayName']: q.get('value') for q in event['qualifiers']}
        for qualifier in shot_qualifiers:
            if qualifier in BODYPARTS:
                shot['BodyPart'] = qualifier
            elif qualifier in PATTERNOFPLAY:
                shot['PatternOfPlay'] = qualifier
            elif qualifier in SHOTLOCATION:
                shot['ShotLocation'] = qualifier
            elif qualifier == 'Zone':
                shot['Zone'] = shot_qualifiers[qualifier]
            elif qualifier == 'RelatedEventId':
                related_event = events.find_one({'eventId': event['relatedEventId'],
                                                 'matchId': event['matchId'],
                                                 'teamId': event['teamId']})
                shot['RelatedEventType'] = related_event['type']['displayName'] if related_event else None

        region = regions.find_one({'regionId': event['regionId']})
        shot['Region'] = region['name'] if region else None

        tournament = tournaments.find_one({'tournamentId': event['tournamentId']})
        shot['Tournament'] = tournament['name'] if tournament else None

        season = seasons.find_one({'seasonId': event['seasonId']})
        shot['Season'] = season['name'] if season else None

        stage = stages.find_one({'stageId': event.get('stageId')})
        shot['Stage'] = stage['name'] if stage else None

        team = teams.find_one({'teamId': event['teamId']})
        shot['Team'] = team['name'] if team else None

        player = players.find_one({'playerId': event['playerId']})
        shot['Player'] = player['name'] if player else None

        match = matches.find_one({'matchId': event['matchId']})
        shot['Side'] = 'home' if team['name'] == match['home']['name'] else 'away'
        shot['Opponent'] = match['away']['name'] if team['name'] == match['home']['name'] else match['home']['name']
        shot['Date'] = match['startDate']

        shot['Period'] = event['period']['displayName']
        shot['Minute'] = event['minute']

        shots.append(shot)

        if len(shots) % 10 == 0:
            print('{0} shots in data set'.format(len(shots)))

    print('{0} shots in data set'.format(len(shots)))

    return DataFrame(shots)
コード例 #4
0
def get_fixtures(tournament_id, season_id):
    season = seasons.find_one({
        'tournament': tournament_id,
        'season': season_id
    })
    tournament = tournaments.find_one({'tournament': tournament_id})
    if tournament.get('cup') == 1:
        url = '{0}/spielplan/gesamtspielplan/pokalwettbewerb/{tournament}/saison_id/{season}'.format(
            SITE, **season)
    else:
        url = '{0}/spielplan/gesamtspielplan/wettbewerb/{tournament}/saison_id/{season}'.format(
            SITE, **season)
    r = requests.get(url, headers=HEADERS)
    print(r.url, tournament['name'], season['name'])

    if r.status_code != 200:
        wait()
        return False

    content = html.fromstring(r.text)
    datestamp, timestamp = date.min, time.min
    for row in content.xpath(
            '//div[@class="box"]/table/tbody/tr[not(td/@colspan)]'):
        teams.update_one({'team': int(row.xpath('td[3]/a/@id')[0])}, {
            '$setOnInsert': {
                'name': row.xpath('td[3]/a/text()')[0],
                'region': tournament['region'],
                'national': False
            }
        },
                         upsert=True)
        teams.update_one({'team': int(row.xpath('td[7]/a/@id')[0])}, {
            '$setOnInsert': {
                'name': row.xpath('td[7]/a/text()')[0],
                'region': tournament['region'],
                'national': False
            }
        },
                         upsert=True)

        if row.xpath('td[2]/text()')[0].strip():
            timestamp = datetime.strptime(
                row.xpath('td[2]/text()')[0].strip(), '%I:%M %p').time()

        if row.xpath('td[1]/a/@href'):
            datestring = row.xpath('td[1]/a/@href')[0].split('/')[-1]
            if datestring == '0000-00-00':
                datestamp = datestamp.min
            else:
                datestamp = datetime.strptime(
                    row.xpath('td[1]/a/@href')[0].split('/')[-1], '%Y-%m-%d')
        else:
            datestamp = datetime.strptime(
                row.xpath('td[1]/text()')[0].strip().split(' ')[-1],
                '%m/%d/%y')

        matches.update_one(
            {'match': int(row.xpath('td[5]/a/@href')[0].split('/')[-1])}, {
                '$setOnInsert': {
                    'season': season['season'],
                    'tournament': tournament['tournament'],
                    'region': tournament['region']
                },
                '$set': {
                    'date': datestamp,
                    'time': datetime.combine(datestamp.date(), timestamp),
                    'home': {
                        'team': int(row.xpath('td[3]/a/@id')[0])
                    },
                    'away': {
                        'team': int(row.xpath('td[7]/a/@id')[0])
                    },
                    'score': row.xpath('td[5]/a/text()')[0]
                }
            },
            upsert=True)

    wait()