Ejemplo n.º 1
0
    def create_league(self, league_link, district, season):
        abbreviation = league_link.text
        bhv_id = parsing.parse_league_bhv_id(league_link)

        if self.options['leagues'] and bhv_id not in self.options['leagues']:
            self.stdout.write('SKIPPING League (options): {} {}'.format(
                bhv_id, abbreviation))
            return

        if abbreviation[:1] in ['m', 'w', 'g', 'u'
                                ] and not self.options['youth']:
            self.stdout.write('SKIPPING League (youth league): {} {}'.format(
                bhv_id, abbreviation))
            return

        url = League.build_source_url(bhv_id)
        dom = logic.get_html(url)

        name = parsing.parse_league_name(dom)

        if League.is_youth_league(name) and not self.options['youth']:
            self.stdout.write('SKIPPING League (youth league): {} {}'.format(
                bhv_id, name))
            return

        team_links = dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[3]/a') or \
                     dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[2]/a')
        if not team_links:
            self.stdout.write('SKIPPING League: {} {} (no team table)'.format(
                bhv_id, name))
            return

        game_rows = parsing.parse_game_rows(dom)
        if not game_rows:
            self.stdout.write('SKIPPING League (no games): {} {}'.format(
                bhv_id, name))
            return
        if len(game_rows) < len(team_links) * (len(team_links) - 1):
            self.stdout.write('SKIPPING League (few games): {} {}'.format(
                bhv_id, abbreviation))
            return

        league, league_created = League.objects.get_or_create(
            name=name,
            abbreviation=abbreviation,
            district=district,
            season=season,
            bhv_id=bhv_id)
        if league_created:
            self.stdout.write('CREATED League: {}'.format(league))
        else:
            self.stdout.write('EXISTING League: {}'.format(league))

        for team_link in team_links:
            self.create_team(team_link, league)
Ejemplo n.º 2
0
def scrape_league(league_link, district, season, options):
    abbreviation = league_link.text
    bhv_id = parsing.parse_league_bhv_id(league_link)

    if bhv_id in BUGGED_LEAGUES:
        LOGGER.debug('SKIPPING League (ignore list): %s %s', bhv_id, abbreviation)
        return

    if options['leagues'] and bhv_id not in options['leagues']:
        LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation)
        return

    if abbreviation == 'TEST':
        LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation)
        return

    url = League.build_source_url(bhv_id)
    html = http.get_text(url)
    dom = parsing.html_dom(html)
    name = parsing.parse_league_name(dom)

    irrelevant_league_name_indicators = [
        'Platzierungsrunde',
        'Kreisvergleichsspiele',
        'pokal', 'Pokal', 'Trophy',
        'Vorbereitung', 'F-FS', 'M-FS', 'Quali',
        'Freiwurf', 'Maxi', 'turnier', 'wettbewerb',
        'Test', 'Planung', 'planung',
    ]

    if any(n in name for n in irrelevant_league_name_indicators):
        LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name)
        return

    team_links = parsing.parse_team_links(dom)
    if not team_links:
        LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name)
        return

    game_rows = parsing.parse_game_rows(dom)
    if not game_rows:
        LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name)
        return

    try:
        name = LeagueName.objects.get(bhv_id=bhv_id).name
    except LeagueName.DoesNotExist:
        pass

    if League.is_youth(abbreviation, name) and not options['youth']:
        LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name)
        return

    league, league_created = League.objects.get_or_create(
        name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id)
    if league_created:
        LOGGER.info('CREATED League: %s', league)
    else:
        LOGGER.info('EXISTING League: %s', league)

    if options['skip_teams']:
        return

    for team_link in team_links:
        scrape_team(team_link, league)

    retirements = parsing.parse_retirements(dom)
    Team.check_retirements(retirements, league, LOGGER)
Ejemplo n.º 3
0
    def create_league(self, league_link, district, season):
        abbreviation = league_link.text
        bhv_id = parsing.parse_league_bhv_id(league_link)

        if self.options['leagues'] and bhv_id not in self.options['leagues']:
            LOGGER.debug('SKIPPING League (options): %s %s', bhv_id,
                         abbreviation)
            return

        if abbreviation == 'TEST':
            LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id,
                         abbreviation)
            return

        url = League.build_source_url(bhv_id)
        dom = logic.get_html(url)
        name = parsing.parse_league_name(dom)

        if any(n in name for n in [
                'Platzierungsrunde', 'Meister', 'Freiwurf', 'Maxi', 'turnier',
                'wettbewerb', 'pokal', 'Test'
        ]):
            LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name)
            return

        team_links = parsing.parse_team_links(dom)
        if not team_links:
            LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id,
                         name)
            return

        game_rows = parsing.parse_game_rows(dom)
        if not game_rows:
            LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name)
            return

        if len(game_rows) < len(team_links) * (len(team_links) - 1):
            LOGGER.debug('SKIPPING League (few games): %s %s', bhv_id,
                         abbreviation)
            return

        name = {
            5380: "Männer Kreisliga 2-1",
            5381: "Männer Kreisliga 2-2",
            7424: "Männer Kreisliga C Staffel 3",
            50351: "gemischte Jugend D Kreisliga A Staffel 1",
            52853: "männliche Jugend C Bezirksliga Staffel 2",
            58111: "Frauen Oberliga Rheinland-Pfalz/Saar 1",
            58116: "Frauen Oberliga Rheinland-Pfalz/Saar 2",
        }.get(bhv_id, name)

        if League.is_youth(abbreviation, name) and not self.options['youth']:
            LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id,
                         abbreviation, name)
            return

        league, league_created = League.objects.get_or_create(
            name=name,
            abbreviation=abbreviation,
            district=district,
            season=season,
            bhv_id=bhv_id)
        if league_created:
            LOGGER.info('CREATED League: %s', league)
        else:
            LOGGER.info('EXISTING League: %s', league)

        if self.options['skip_teams']:
            return

        for team_link in team_links:
            create_team(team_link, league)

        retirements = parsing.parse_retirements(dom)
        check_retirements(retirements, league)