def create_association(self, bhv_id): url = Association.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) try: abbreviation = Association.get_association_abbreviation(name) except KeyError: LOGGER.warning("No abbreviation for association '%s'", name) return if self.options['associations'] and bhv_id not in self.options[ 'associations']: LOGGER.debug('SKIPPING Association (options): %s %s', bhv_id, name) return association, created = Association.objects.get_or_create( name=name, abbreviation=abbreviation, bhv_id=bhv_id) if created: LOGGER.info('CREATED Association: %s', association) else: LOGGER.info('EXISTING Association: %s', association) items = dom.xpath('//select[@name="orgID"]/option[position()>1]') for item in items: try: self.create_district(item, association) except Exception: logging.getLogger('mail').exception( "Could not create District")
def get_sports_hall(self, game_row): if len(game_row[3]) != 1: return link = game_row[3][0] number = int(link.text) bhv_id = parsing.parse_sports_hall_bhv_id(link) if not SportsHall.objects.filter(number=number, bhv_id=bhv_id).exists(): url = SportsHall.build_source_url(bhv_id) tree = logic.get_html(url) table = tree.xpath('//table[@class="gym"]')[0] name = table[0][1][0].text city = table[1][1].text street = table[2][1].text address = street + ", " + city if street else city phone_number = table[3][1].text latitude, longitude = parsing.parse_coordinates(tree) sports_hall = SportsHall.objects.create(number=number, name=name, address=address, phone_number=phone_number, latitude=latitude, longitude=longitude, bhv_id=bhv_id) self.stdout.write('CREATED Sports Hall: {}'.format(sports_hall)) return sports_hall else: return SportsHall.objects.get(number=number, bhv_id=bhv_id)
def create_district(self, district_item, association): name = district_item.text bhv_id = int(district_item.get('value')) if self.options['districts'] and bhv_id not in self.options[ 'districts']: self.stdout.write('SKIPPING District (options): {} {}'.format( bhv_id, name)) return district, created = District.objects.get_or_create(name=name, bhv_id=bhv_id) district.associations.add(association) if bhv_id in self.processed_districts: self.stdout.write( 'SKIPPING District: {} {} (already processed)'.format( bhv_id, name)) return if created: self.stdout.write('CREATED District: {}'.format(district)) else: self.stdout.write('EXISTING District: {}'.format(district)) self.processed_districts.add(bhv_id) seasons_url = District.build_source_url(district.bhv_id, '1990-01-01') seasons_dom = logic.get_html(seasons_url) season_headings = seasons_dom.xpath( '//div[@id="results"]/div/a[@name]/h4/text()') season_links = seasons_dom.xpath('//div[@id="results"]/div/a[@href]') seasons = zip(season_headings, season_links) for season_heading, season_link in seasons: self.create_season(season_heading, season_link, district)
def create_season(self, district_season_heading, district_season_link, district): start_year = parsing.parse_district_season_start_year( district_season_heading) if start_year is None: self.stdout.write( 'SKIPPING District Season (irrelevant): {} {}'.format( district, district_season_heading)) return if self.options['seasons'] and start_year not in self.options[ 'seasons']: self.stdout.write( 'SKIPPING District Season (options): {}'.format(start_year)) return season, season_created = Season.objects.get_or_create( start_year=start_year) if season_created: self.stdout.write('CREATED Season: {}'.format(season)) else: self.stdout.write('EXISTING Season: {}'.format(season)) date = parsing.parse_district_link_date(district_season_link) url = District.build_source_url(district.bhv_id, date) dom = logic.get_html(url) league_links = dom.xpath( '//div[@id="results"]/div/table[2]/tr/td[1]/a') for league_link in league_links: self.create_league(league_link, district, season)
def create_season(self, district, start_year): if self.options['seasons'] and start_year not in self.options[ 'seasons']: LOGGER.debug('SKIPPING Season (options): %s', start_year) return season, season_created = Season.objects.get_or_create( start_year=start_year) if season_created: LOGGER.info('CREATED Season: %s', season) else: LOGGER.info('EXISTING Season: %s', season) for start_date in [ date(start_year, 10, 1) + timedelta(days=10 * n) for n in range(4) ]: LOGGER.debug('trying District Season: %s %s %s', district, season, start_date) url = District.build_source_url(district.bhv_id, start_date) dom = logic.get_html(url) league_links = dom.xpath( '//div[@id="results"]/div/table[2]/tr/td[1]/a') if league_links: break else: LOGGER.warning('District Season without Leagues: %s %s', district, season) return for league_link in league_links: try: self.create_league(league_link, district, season) except Exception: logging.getLogger('mail').exception("Could not create League")
def update_league_names(*_): for league in League.objects.all(): dom = logic.get_html(league.source_url()) name = parsing.parse_league_name(dom) if name != league.name: league.name = name league.save() LOGGER.info('RENAMED LEAGUE: %s', league)
def create_league(self, league_link, district, season): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if self.options['leagues'] and bhv_id not in self.options['leagues']: self.stdout.write('SKIPPING League (options): {} {}'.format( bhv_id, abbreviation)) return if abbreviation[:1] in ['m', 'w', 'g', 'u' ] and not self.options['youth']: self.stdout.write('SKIPPING League (youth league): {} {}'.format( bhv_id, abbreviation)) return url = League.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) if League.is_youth_league(name) and not self.options['youth']: self.stdout.write('SKIPPING League (youth league): {} {}'.format( bhv_id, name)) return team_links = dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[3]/a') or \ dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[2]/a') if not team_links: self.stdout.write('SKIPPING League: {} {} (no team table)'.format( bhv_id, name)) return game_rows = parsing.parse_game_rows(dom) if not game_rows: self.stdout.write('SKIPPING League (no games): {} {}'.format( bhv_id, name)) return if len(game_rows) < len(team_links) * (len(team_links) - 1): self.stdout.write('SKIPPING League (few games): {} {}'.format( bhv_id, abbreviation)) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: self.stdout.write('CREATED League: {}'.format(league)) else: self.stdout.write('EXISTING League: {}'.format(league)) for team_link in team_links: self.create_team(team_link, league)
def import_league(self, league): if self.options['leagues'] and league.bhv_id not in self.options[ 'leagues']: self.stdout.write('SKIPPING League: {} (options)'.format(league)) return tree = logic.get_html(league.source_url()) game_rows = tree.xpath( "//table[@class='gametable']/tr[position() > 1]") for game_row in game_rows: self.import_game(game_row, league)
def create_associations(self): url = settings.NEW_ROOT_SOURCE_URL dom = logic.get_html(url) portal_paths = dom.xpath( '//div[@id="main-content"]//table[@summary]/tbody/tr/td[1]/a/@href' ) for portal_path in portal_paths: portal_url = portal_path if portal_path.startswith( 'http') else settings.NEW_ROOT_SOURCE_URL + portal_path bhv_id = self.get_association_bhv_id(portal_url) try: self.create_association(bhv_id) except Exception: logging.getLogger('mail').exception( "Could not create Association")
def import_league(self, league: League): if self.options['leagues'] and league.bhv_id not in self.options[ 'leagues']: LOGGER.debug('SKIPPING League: %s (options)', league) return if league.youth and not self.options['youth']: LOGGER.debug('SKIPPING League (youth league): %s', league) return tree = logic.get_html(league.source_url()) game_rows = tree.xpath( "//table[@class='gametable']/tr[position() > 1]") for game_row in game_rows: try: self.import_game(game_row, league) except Exception: logging.getLogger('mail').exception("Could not import Game")
def create_team(link, league): bhv_id = parsing.parse_team_bhv_id(link) name = link.text url = Team.build_source_url(league.bhv_id, bhv_id) dom = logic.get_html(url) game_rows = parsing.parse_game_rows(dom) short_team_names = [ c.text for game_row in game_rows for c in game_row.xpath('td')[4:7:2] ] short_team_name = max(set(short_team_names), key=short_team_names.count) team, created = Team.objects.get_or_create(name=name, short_name=short_team_name, league=league, bhv_id=bhv_id) if created: LOGGER.info('CREATED Team: %s', team) else: LOGGER.info('EXISTING Team: %s', team)
def parse_sports_hall(number, bhv_id): url = SportsHall.build_source_url(bhv_id) tree = logic.get_html(url) table = tree.xpath('//table[@class="gym"]')[0] name = table[0][1][0].text city = table[1][1].text street = table[2][1].text address = street + ", " + city if street else city phone_number = table[3][1].text latitude, longitude = parsing.parse_coordinates(tree) sports_hall = SportsHall.objects.create(number=number, name=name, address=address, phone_number=phone_number, latitude=latitude, longitude=longitude, bhv_id=bhv_id) LOGGER.info('CREATED Sports Hall: %s', sports_hall) return sports_hall
def create_association(self, association_link): name = association_link.text abbreviation = Association.get_association_abbreviation(name) bhv_id = parsing.parse_association_bhv_id(association_link) if self.options['associations'] and bhv_id not in self.options[ 'associations']: self.stdout.write('SKIPPING Association (options): {} {}'.format( bhv_id, name)) return association, created = Association.objects.get_or_create( name=name, abbreviation=abbreviation, bhv_id=bhv_id) if created: self.stdout.write('CREATED Association: {}'.format(association)) else: self.stdout.write('EXISTING Association: {}'.format(association)) url = association.source_url() dom = logic.get_html(url) items = dom.xpath('//select[@name="orgID"]/option[position()>1]') for item in items: self.create_district(item, association)
def create_associations(self): url = settings.ROOT_SOURCE_URL dom = logic.get_html(url) links = dom.xpath('//div[@id="main-content"]/div/ul/li/a') for link in links: self.create_association(link)
def get_association_bhv_id(self, association_portal_url: str) -> int: dom = logic.get_html(association_portal_url) [bhv_id] = dom.xpath('//div[@id="app"]/@data-og-id') return int(bhv_id)
def create_league(self, league_link, district, season): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if self.options['leagues'] and bhv_id not in self.options['leagues']: LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation) return if abbreviation == 'TEST': LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation) return url = League.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) if any(n in name for n in [ 'Platzierungsrunde', 'Meister', 'Freiwurf', 'Maxi', 'turnier', 'wettbewerb', 'pokal', 'Test' ]): LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name) return team_links = parsing.parse_team_links(dom) if not team_links: LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name) return game_rows = parsing.parse_game_rows(dom) if not game_rows: LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name) return if len(game_rows) < len(team_links) * (len(team_links) - 1): LOGGER.debug('SKIPPING League (few games): %s %s', bhv_id, abbreviation) return name = { 5380: "Männer Kreisliga 2-1", 5381: "Männer Kreisliga 2-2", 7424: "Männer Kreisliga C Staffel 3", 50351: "gemischte Jugend D Kreisliga A Staffel 1", 52853: "männliche Jugend C Bezirksliga Staffel 2", 58111: "Frauen Oberliga Rheinland-Pfalz/Saar 1", 58116: "Frauen Oberliga Rheinland-Pfalz/Saar 2", }.get(bhv_id, name) if League.is_youth(abbreviation, name) and not self.options['youth']: LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: LOGGER.info('CREATED League: %s', league) else: LOGGER.info('EXISTING League: %s', league) if self.options['skip_teams']: return for team_link in team_links: create_team(team_link, league) retirements = parsing.parse_retirements(dom) check_retirements(retirements, league)