Ejemplo n.º 1
0
def _parse_file(file_path):
    m = re.search(r'(\d{4}-\d{2}-\d{2})\.html$', file_path)
    grab_date_str = m.group(1)

    grab_date = datetime.datetime.strptime(grab_date_str, '%Y-%m-%d').date()
    tomorrow_grab_date = grab_date + datetime.timedelta(days=1)

    with open(file_path, 'rt', encoding='utf-8') as f_in:
        for raw_match_data in handle(f_in):
            if not _is_betcity_tournament_name_valid(
                    raw_match_data['tournament']):
                if raw_match_data['tournament'].startswith('Футбол.'):
                    _unknown_tournaments.add(raw_match_data['tournament'])
                continue
            if 'Статистика игрового дня' in raw_match_data[
                    'tournament'] or 'Статистика турнира' in raw_match_data[
                        'tournament']:
                continue

            if not is_value_valid(teams_data, 'betcityName',
                                  raw_match_data['home']):
                _unknown_teams.add(raw_match_data['home'])
            if not is_value_valid(teams_data, 'betcityName',
                                  raw_match_data['away']):
                _unknown_teams.add(raw_match_data['away'])

            match_date = datetime.datetime.strptime(raw_match_data['date'],
                                                    '%d.%m.%Y').date()
            if match_date != grab_date and match_date != tomorrow_grab_date:
                continue
            match_date_str = match_date.strftime('%Y-%m-%d')

            betcity_match_uuid = get_identifier()

            match_data = {
                'uuid': betcity_match_uuid,
                'tournament': raw_match_data['tournament'],
                'date': match_date_str,
                'time': raw_match_data['time'],
                'home': raw_match_data['home'],
                'away': raw_match_data['away'],
                'specialWord': raw_match_data['special_word'],
                'bets': raw_match_data['bets']
            }

            out_dir_path = os.path.join('tmp', 'update', 'betcity',
                                        'matchesJson', match_date_str)
            os.makedirs(out_dir_path, exist_ok=True)
            out_file_path = os.path.join(out_dir_path,
                                         '%s.json' % (betcity_match_uuid, ))
            with open(out_file_path, 'wt', encoding='utf-8') as f_out:
                json.dump(match_data, f_out, ensure_ascii=False)
Ejemplo n.º 2
0
def _parse_file(file_path):
    with open(file_path, 'rt', encoding='utf-8') as f:
        data = handle_date(f)

    for item in data:
        (intelbet_country, intelbet_tournament, intelbet_home, intelbet_away,
         url, match_time_str) = item

        if not ( is_value_valid(countries_data, 'intelbetCountryName', intelbet_country) and \
          is_value_valid(tournaments_data, 'intelbetTournamentName', intelbet_tournament) ):
            continue

        print(url)
        match_names_automatically(url)
Ejemplo n.º 3
0
def _parse_file(file_path):
    m = re.search(r'(\d{4}-\d{2}-\d{2})\.html$', file_path)
    date_str = m.group(1)

    with open(file_path, 'rt', encoding='utf-8') as f:
        data = handle_date(f)

    for item in data:
        (intelbet_country, intelbet_tournament, intelbet_home, intelbet_away,
         url, match_time_str) = item

        if not ( is_value_valid(countries_data, 'intelbetCountryName', intelbet_country) and \
          is_value_valid(tournaments_data, 'intelbetTournamentName', intelbet_tournament) ):
            continue

        intelbet_match_uuid = get_identifier()

        intelbet_match_header = {
            'uuid': intelbet_match_uuid,
            'date': date_str,
            'home': intelbet_home,
            'away': intelbet_away,
            'contry': intelbet_country,
            'tournament': intelbet_tournament,
            'url': url,
            'time': match_time_str
        }

        print(url)
        match_html = intelbet_get(url, delay=0.5)

        out_dir_path = os.path.join('tmp', 'update', 'intelbet', 'matchesHtml',
                                    date_str)
        os.makedirs(out_dir_path, exist_ok=True)

        header_out_file_path = os.path.join(
            out_dir_path, '%s.json' % (intelbet_match_uuid, ))
        with open(header_out_file_path, 'wt',
                  encoding='utf-8') as header_f_out:
            json.dump(intelbet_match_header, header_f_out, ensure_ascii=False)

        out_file_path = os.path.join(out_dir_path,
                                     '%s.html' % (intelbet_match_uuid, ))
        with open(out_file_path, 'wt', encoding='utf-8') as f_out:
            f_out.write(match_html)
Ejemplo n.º 4
0
def _is_betcity_tournament_name_valid(betcity_tournament_name):
    possible_tournament_names = _get_possible_tournament_names(
        betcity_tournament_name)

    for possible_tournament_name in possible_tournament_names:
        if is_value_valid(tournaments_data, 'betcityTournamentName',
                          possible_tournament_name):
            return True

    return False
Ejemplo n.º 5
0
def _parse_file(file_path):
    with open(file_path, 'rt', encoding='utf-8') as f:
        data = json.load(f)

    m = re.search('(\d{4}-\d{2}-\d{2})\.json$', file_path)
    if m is None:
        raise RuntimeError('invalid file name')
    match_date_str = m.group(1)

    (main_data, raw_tournaments_data, raw_matches_data) = data

    stages_data = {}
    for raw_stage_data in raw_tournaments_data:
        if not is_value_valid(tournaments_data, 'whoscoredTournamentId',
                              raw_stage_data[4]):
            continue

        stage_id = raw_stage_data[0]
        stages_data[stage_id] = {
            'region_id': int(raw_stage_data[1]),
            'region_name': raw_stage_data[3],
            'tournament_id': raw_stage_data[4],
            'tournament_name': raw_stage_data[7],
            'season_id': raw_stage_data[6],
            'stage_id': raw_stage_data[0],
        }

    for raw_match_data in raw_matches_data:
        stage_id = raw_match_data[0]
        if stage_id not in stages_data:
            continue
        stage_data = stages_data[stage_id]

        if not is_value_valid(teams_data, 'whoscoredId',
                              raw_match_data[4]) or not is_value_valid(
                                  teams_data, 'whoscoredName',
                                  raw_match_data[5]):
            _unknown_teams.add(raw_match_data[4])

        whoscored_match_uuid = get_identifier()

        whoscored_match_id = raw_match_data[1]
        whoscored_header = {
            'uuid': whoscored_match_uuid,
            'matchId': whoscored_match_id,
            'date': match_date_str,
            'home': raw_match_data[5],
            'homeId': raw_match_data[4],
            'away': raw_match_data[9],
            'awayId': raw_match_data[8],
            'regionId': stage_data['region_id'],
            'tournamentId': stage_data['tournament_id'],
            'seasonId': stage_data['season_id'],
            'stageId': stage_data['stage_id']
        }

        # WARNNING: Бывают и другие страницы
        url = 'https://www.whoscored.com/Matches/%d/Live' % (
            whoscored_match_id, )
        print(url)
        match_html = whoscored_get(url, delay=0.5)

        out_dir_path = os.path.join('tmp', 'update', 'whoscored',
                                    'matchesHtml', match_date_str)
        os.makedirs(out_dir_path, exist_ok=True)

        whoscored_header_out_file_path = os.path.join(
            out_dir_path, '%s.json' % (whoscored_match_uuid, ))
        with open(whoscored_header_out_file_path, 'wt',
                  encoding='utf-8') as whoscored_header_f_out:
            json.dump(whoscored_header,
                      whoscored_header_f_out,
                      ensure_ascii=False)

        html_out_file_path = os.path.join(out_dir_path,
                                          '%s.html' % (whoscored_match_uuid, ))
        with open(html_out_file_path, 'wt', encoding='utf-8') as html_f_out:
            html_f_out.write(match_html)