def test_another_retired_team(self): dom = read_html('league_with_retired_team_28454.html') retirements = parsing.parse_retirements(dom) expected = [('TSG Stuttgart', date(2018, 3, 1))] self.assertEqual(expected, retirements)
def test_retired_team(self): dom = read_html('league_with_retired_team.html') retirements = parsing.parse_retirements(dom) expected = [('TV 1893 Neuhausen/E.', date(2018, 6, 29))] self.assertEqual(expected, retirements)
def scrape_league(league_link, district, season, options): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if bhv_id in BUGGED_LEAGUES: LOGGER.debug('SKIPPING League (ignore list): %s %s', bhv_id, abbreviation) return if options['leagues'] and bhv_id not in options['leagues']: LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation) return if abbreviation == 'TEST': LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation) return url = League.build_source_url(bhv_id) html = http.get_text(url) dom = parsing.html_dom(html) name = parsing.parse_league_name(dom) irrelevant_league_name_indicators = [ 'Platzierungsrunde', 'Kreisvergleichsspiele', 'pokal', 'Pokal', 'Trophy', 'Vorbereitung', 'F-FS', 'M-FS', 'Quali', 'Freiwurf', 'Maxi', 'turnier', 'wettbewerb', 'Test', 'Planung', 'planung', ] if any(n in name for n in irrelevant_league_name_indicators): LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name) return team_links = parsing.parse_team_links(dom) if not team_links: LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name) return game_rows = parsing.parse_game_rows(dom) if not game_rows: LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name) return try: name = LeagueName.objects.get(bhv_id=bhv_id).name except LeagueName.DoesNotExist: pass if League.is_youth(abbreviation, name) and not options['youth']: LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: LOGGER.info('CREATED League: %s', league) else: LOGGER.info('EXISTING League: %s', league) if options['skip_teams']: return for team_link in team_links: scrape_team(team_link, league) retirements = parsing.parse_retirements(dom) Team.check_retirements(retirements, league, LOGGER)
def test_empty_retirement(self): dom = read_html('league_without_retired_team.html') retirements = parsing.parse_retirements(dom) self.assertEqual(retirements, [])
def create_league(self, league_link, district, season): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if self.options['leagues'] and bhv_id not in self.options['leagues']: LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation) return if abbreviation == 'TEST': LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation) return url = League.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) if any(n in name for n in [ 'Platzierungsrunde', 'Meister', 'Freiwurf', 'Maxi', 'turnier', 'wettbewerb', 'pokal', 'Test' ]): LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name) return team_links = parsing.parse_team_links(dom) if not team_links: LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name) return game_rows = parsing.parse_game_rows(dom) if not game_rows: LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name) return if len(game_rows) < len(team_links) * (len(team_links) - 1): LOGGER.debug('SKIPPING League (few games): %s %s', bhv_id, abbreviation) return name = { 5380: "Männer Kreisliga 2-1", 5381: "Männer Kreisliga 2-2", 7424: "Männer Kreisliga C Staffel 3", 50351: "gemischte Jugend D Kreisliga A Staffel 1", 52853: "männliche Jugend C Bezirksliga Staffel 2", 58111: "Frauen Oberliga Rheinland-Pfalz/Saar 1", 58116: "Frauen Oberliga Rheinland-Pfalz/Saar 2", }.get(bhv_id, name) if League.is_youth(abbreviation, name) and not self.options['youth']: LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: LOGGER.info('CREATED League: %s', league) else: LOGGER.info('EXISTING League: %s', league) if self.options['skip_teams']: return for team_link in team_links: create_team(team_link, league) retirements = parsing.parse_retirements(dom) check_retirements(retirements, league)