def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show in soup.select('td.version > h3 > a[href^="/show/"]'): show_clean = self.clean_punctuation(show.text.lower()) show_id = int(show['href'][6:]) show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id logger.debug('Found %d show ids', len(show_ids)) return show_ids
def test_series_year_re(): match = series_year_re.match('That\'s: A-series.name!? (US) (2016)') assert match assert match.group('series') == 'That\'s: A-series.name!? (US)' assert int(match.group('year')) == 2016