def extract_data(filename='shots.csv'): r, c, s, g, t, p = dict(), dict(), dict(), dict(), dict(), dict() for region in regions.find(): player_id = region['regionId'] r[player_id] = region['name'] for tournament in tournaments.find(): player_id = tournament['tournamentId'] c[player_id] = tournament['name'] for season in seasons.find(): player_id = season['seasonId'] s[player_id] = season['name'] for stage in stages.find(): player_id = stage['stageId'] g[player_id] = stage['name'] for team in teams.find(): player_id = team['teamId'] t[player_id] = team['name'] for player in players.find(): player_id = player['playerId'] p[player_id] = player['name'] f = open(filename, 'w', newline='\n') writer = csv.DictWriter(f, fieldnames=FIELDNAMES + QUALIFIERS, extrasaction='ignore') writer.writeheader() # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}}) # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}}) # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots)) for event in events.find({'isShot': True, 'isOwnGoal': {'$exists': False}}).sort('matchId', -1): event['isGoal'] = 1 if event.get('isGoal') else 0 event['Region'] = r[event['regionId']] if event.get('regionId') else None event['Tournament'] = c[event['tournamentId']] if event.get('tournamentId') else None event['Season'] = s[event['seasonId']] if event.get('seasonId') else None event['Stage'] = g[event['stageId']] if event.get('stageId') else None event['Team'] = t[event['teamId']] if event.get('teamId') else None try: event['Player'] = p[event['playerId']] if event.get('playerId') else None except KeyError: player_id = event['playerId'] print('Missing playerId: {}'.format(player_id)) get_player(player_id) event['Player'] = None event_qualifiers = {q['type']['displayName']: q.get('value', 1) for q in event['qualifiers']} for qualifier in QUALIFIERS: event[qualifier] = event_qualifiers.get(qualifier, 0) writer.writerow(event)
def update_matches(status_code=None): # 0: Error # 1: Pending # 2: Postponed # 3: In-Play # 4: (Not seen) # 5: Abandoned # 6: Complete # 7: Cancelled # 8: Ignore if status_code is None or type(status_code) is not int: status_code = {'$in': [1, 3]} for match in matches.find({'statusCode': status_code, 'error': {'$exists': False}, 'startDate': {'$lte': datetime.today()}, 'startTime': {'$lte': datetime.today()}, }).sort([('statusCode', 1), ('startDate', -1)]): print(match['matchId'], match['statusCode'], match['startTime']) get_match(match['matchId'], overwrite=True) if __name__ == "__main__": get_all_tournaments() for tournament in tournaments.find({'tournamentId': 2}): get_seasons(tournament['tournamentId'], overwrite=True) for season in seasons.find({'tournamentId': 2}): get_stages(season['seasonId'], overwrite=True) for stage in stages.find({'tournamentId': 2}): get_fixtures(stage['stageId'], overwrite=True)
def extract_data(filename='shots.csv'): r, c, s, g, t, p = dict(), dict(), dict(), dict(), dict(), dict() for region in regions.find(): player_id = region['regionId'] r[player_id] = region['name'] for tournament in tournaments.find(): player_id = tournament['tournamentId'] c[player_id] = tournament['name'] for season in seasons.find(): player_id = season['seasonId'] s[player_id] = season['name'] for stage in stages.find(): player_id = stage['stageId'] g[player_id] = stage['name'] for team in teams.find(): player_id = team['teamId'] t[player_id] = team['name'] for player in players.find(): player_id = player['playerId'] p[player_id] = player['name'] f = open(filename, 'w', newline='\n') writer = csv.DictWriter(f, fieldnames=FIELDNAMES + QUALIFIERS, extrasaction='ignore') writer.writeheader() # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}}) # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}}) # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots)) for event in events.find({ 'isShot': True, 'isOwnGoal': { '$exists': False } }).sort('matchId', -1): event['isGoal'] = 1 if event.get('isGoal') else 0 event['Region'] = r[event['regionId']] if event.get( 'regionId') else None event['Tournament'] = c[event['tournamentId']] if event.get( 'tournamentId') else None event['Season'] = s[event['seasonId']] if event.get( 'seasonId') else None event['Stage'] = g[event['stageId']] if event.get('stageId') else None event['Team'] = t[event['teamId']] if event.get('teamId') else None try: event['Player'] = p[event['playerId']] if event.get( 'playerId') else None except KeyError: player_id = event['playerId'] print('Missing playerId: {}'.format(player_id)) get_player(player_id) event['Player'] = None event_qualifiers = { q['type']['displayName']: q.get('value', 1) for q in event['qualifiers'] } for qualifier in QUALIFIERS: event[qualifier] = event_qualifiers.get(qualifier, 0) writer.writerow(event)
for attendance in content.xpath( '//div[@class="sb-spieldaten"]/p[3]/span/strong/text()'): match['attendance'] = int( attendance.replace(' Spectators', '').replace('.', '')) for referee in content.xpath('//div[@class="sb-spieldaten"]/p[3]/a'): referees.update_one( {'referee': int(referee.xpath('@href')[0].split('/')[-1])}, {'$setOnInsert': { 'name': referee.xpath('@title')[0] }}, upsert=True) match['referee'] = int(referee.xpath('@href')[0].split('/')[-1]) matches.save(match) wait() if __name__ == '__main__': get_regions() for region in regions.find().sort('name'): get_tournaments(region['region']) for tournament in tournaments.find().sort('tournament'): get_seasons(tournament['tournament']) for season in seasons.find().sort([('season', -1), ('tournament', 1)]).batch_size(1): get_fixtures(season['tournament'], season['season']) for match in matches.find().sort('date', -1): get_lineups(match['match'])