コード例 #1
0
ファイル: utils.py プロジェクト: dskrypa/music_manager
def link_client_and_title(link: Link) -> tuple[MediaWikiClient, str]:
    if not link.source_site:
        raise NoLinkSite(link)
    mw_client = MediaWikiClient(link.source_site)
    title = link.title
    if link.interwiki:
        iw_key, title = link.iw_key_title
        mw_client = mw_client.interwiki_client(iw_key)
    elif not title:
        raise NoLinkTarget(link)
    return mw_client, title
コード例 #2
0
ファイル: base.py プロジェクト: dskrypa/music_manager
    def find_from_links(cls: Type[WE], links: Iterable[Link]) -> WE:
        """
        :param links: An iterable that yields Link nodes.
        :return: The first instance of this class for a link that has a valid category for this class or a subclass
          thereof
        """
        last_exc = None
        client_title_link_map = site_titles_map(links)
        results, errors = MediaWikiClient.get_multi_site_pages(
            client_title_link_map)
        for site, pages in results.items():
            for title, page in pages.items():
                try:
                    return cls._by_category(page)
                except EntityTypeError as e:
                    last_exc = e
                except AmbiguousPageError as e:
                    link = client_title_link_map[page._client][title]
                    e.add_context(f'While processing {link=} from {link.root}')
                    # last_exc = e
                    raise

        if last_exc:
            raise last_exc
        raise ValueError(f'No pages were found')
コード例 #3
0
ファイル: base.py プロジェクト: dskrypa/music_manager
    def _from_site_title_map(
        cls: Type[WE],
        site_title_map: Mapping[Union[str, MediaWikiClient], Iterable[str]],
        search: bool = False,
        strict: int = 2,
        title_name_map=None,
    ) -> dict[Union[str, Name], WE]:
        # log.debug(f'{cls.__name__}._from_site_title_map({site_title_map=},\n{search=}, {strict=},\n{title_name_map=})')
        title_name_map = title_name_map or {}
        results, _errors = MediaWikiClient.get_multi_site_pages(site_title_map,
                                                                search=search)
        for title, error in _errors.items():
            log.error(f'Error processing {title=!r}: {error}',
                      extra={'color': 9})

        title_entity_map = {}
        for title, pages in multi_site_page_map(results).items():
            name = title_name_map.get(title)
            try:
                title_entity_map[name or title] = cls._from_multi_site_pages(
                    pages, name, strict)
            except (EntityTypeError, AmbiguousPageError) as e:
                if strict > 1:
                    raise
                else:
                    log.log(logging.WARNING if strict else logging.DEBUG,
                            e,
                            extra={'color': 9})

        return title_entity_map
コード例 #4
0
    def from_name(cls, name: str) -> 'Soundtrack':
        client = MediaWikiClient('wiki.d-addicts.com')
        results = client.get_pages(name, search=True, gsrwhat='text')
        log.debug(f'Search results for {name=!r}: {results}')
        for title, page in results.items():
            try:
                return cls._by_category(page)
            except EntityTypeError:
                try:
                    show = TVSeries._by_category(page)
                except EntityTypeError:
                    log.debug(
                        f'Found {page=!r} that is neither an OST or a TVSeries'
                    )
                else:
                    return cls.find_from_links(show.soundtrack_links())

        raise ValueError(f'No pages were found for OSTs matching {name!r}')
コード例 #5
0
 def _get_lang_from_artist_template(self):
     for tmpl in self.page.sections.find_all(Template, True):
         if tmpl.name == self.artist.name.english and tmpl.value is None:
             mwc = MediaWikiClient(tmpl.root.site)
             template = mwc.get_page(
                 f'Template:{tmpl.name}').sections.content.zipped
             for section, values in template.items():
                 if lang := next((val for val in ('Korean', 'Japanese')
                                  if section.startswith(val)), None):
                     if isinstance(values, Link):
                         if self.page.title == values.title:
                             return lang
                     else:
                         for node in values:
                             if isinstance(
                                     node, Link
                             ) and self.page.title == node.title:
                                 return lang
             break
コード例 #6
0
ファイル: base.py プロジェクト: dskrypa/music_manager
 def from_title(
     cls: Type[WE],
     title: str,
     sites: StrOrStrs = None,
     search: bool = True,
     research: bool = False,
     name: Optional[Name] = None,
     strict: int = 2,
     **kwargs,
 ) -> WE:
     """
     :param str title: A page title
     :param iterable sites: A list or other iterable that yields site host strings
     :param bool search: Whether the provided title should also be searched for, in case there is not an exact match.
     :param bool research: If only one site returned a hit, re-search with the title from that site
     :param Name name: The Name of the entity to retrieve
     :param int strict: Error handling strictness.  If 2 (default), let all exceptions be propagated.  If 1, log
       EntityTypeError and AmbiguousPageError as a warning.  If 0, log those errors on debug level.
     :return: A WikiEntity (or subclass thereof) that represents the page(s) with the given title.
     """
     sites = _sites(sites)
     pages, errors = MediaWikiClient.get_multi_site_page(title,
                                                         sites,
                                                         search=search)
     if pages:
         entity = cls._from_multi_site_pages(pages.values(),
                                             name,
                                             strict=strict,
                                             **kwargs)
         if search and research:
             if 0 < len(entity._pages) < len(sites):
                 # noinspection PyUnboundLocalVariable
                 if (name :=
                         entity.name) and (eng :=
                                           name.english) and eng != title:
                     log.debug(
                         f'Returning {cls.__name__}.from_title for {eng=!r}'
                     )
                     research_entity = cls.from_title(
                         eng,
                         set(sites).difference(entity._pages), search,
                         False, **kwargs)
                     research_entity._add_pages(entity._pages)
                     return research_entity
         return entity
コード例 #7
0
ファイル: base.py プロジェクト: dskrypa/music_manager
 def from_name(cls, name: str, site: str) -> 'TemplateEntity':
     page = MediaWikiClient(site).get_page(f'Template:{name}')
     return cls._by_category(page)
コード例 #8
0
ファイル: base.py プロジェクト: dskrypa/music_manager
 def from_url(cls: Type[WE], url: str, **kwargs) -> WE:
     return cls._by_category(MediaWikiClient.page_for_article(url),
                             **kwargs)
コード例 #9
0
ファイル: abc.py プロジェクト: dskrypa/music_manager
 def __init_subclass__(cls, site: str, domain: Optional[str] = None):
     WikiParser._site_parsers[site] = cls
     if domain:
         WikiParser._domain_parsers['.' + domain] = cls
     cls.client = MediaWikiClient(site)
コード例 #10
0
ファイル: discography.py プロジェクト: dskrypa/music_manager
    def process_entries(self) -> Dict[str, List[DiscographyEntry]]:
        discography = defaultdict(
            list)  # type: Dict[str, List[DiscographyEntry]]
        pages_by_site, errors_by_site = MediaWikiClient.get_multi_site_pages(
            self.entries_by_site)
        for site_client, title_entry_map in self.entries_by_site.items():
            site = site_client.host
            for title, page in pages_by_site.get(site, {}).items():
                # log.debug(f'Found page with title={title!r} from site={site}')
                try:
                    disco_entry, link = title_entry_map.pop(title)
                except KeyError:
                    log.error(
                        f'No disco entry was found for {title=!r} from {site=}',
                        extra={'color': 9})
                    continue
                src_site = disco_entry.source.site
                try:
                    # log.debug(f'Creating DiscographyEntry for page={page} with entry={disco_entry}')
                    discography[src_site].append(
                        DiscographyEntry.from_page(page,
                                                   disco_entry=disco_entry,
                                                   artist=self.artist))
                except (EntityTypeError, AmbiguousPageError) as e:
                    self.remaining[disco_entry] -= 1
                    if self.created_entry[disco_entry]:
                        msg = 'Type mismatch' if isinstance(
                            e, EntityTypeError) else 'Ambiguous page error'
                        log.log(
                            8,
                            f'{msg} for additional {link=} associated with {disco_entry}: {e}'
                        )
                    elif self.remaining[disco_entry]:
                        log.log(
                            8,
                            f'{e}, but {self.remaining[disco_entry]} associated links are pending processing'
                        )
                    else:
                        log.log(9, f'{e}, and no other links are available')
                        # log.debug(f'Creating DiscographyEntry for page=[none found] entry={disco_entry}')
                        try:
                            discography[src_site].append(
                                DiscographyEntry.from_disco_entry(
                                    disco_entry, artist=self.artist))
                        except EntityTypeError:
                            pass
                        else:
                            self.created_entry[disco_entry] = True
                except Exception as e:
                    self.remaining[disco_entry] -= 1
                    msg = f'Unexpected error processing page={title!r} for {disco_entry=}:'
                    log.error(msg, exc_info=True, extra={'color': 9})
                else:
                    self.remaining[disco_entry] -= 1
                    self.created_entry[disco_entry] = True
                    disco_entry._link = link

            for title, (disco_entry, link) in title_entry_map.items():
                if not self.created_entry[disco_entry]:
                    log.log(
                        9,
                        f'No page found for {title=!r} / {link=} / entry={disco_entry}'
                    )
                    # log.debug(f'Creating DiscographyEntry for page=[none found] entry={disco_entry}')
                    try:
                        discography[disco_entry.source.site].append(
                            DiscographyEntry.from_disco_entry(
                                disco_entry, artist=self.artist))
                    except EntityTypeError:
                        pass
                    else:
                        self.created_entry[disco_entry] = True

        for site, disco_entries in self.no_link_entries.items():
            site_discography = discography.setdefault(site, [])
            for disco_entry in disco_entries:
                if not self.created_entry[disco_entry]:
                    # log.debug(f'Creating DiscographyEntry for page=[no links] entry={disco_entry}')
                    try:
                        site_discography.append(
                            DiscographyEntry.from_disco_entry(
                                disco_entry, artist=self.artist))
                    except EntityTypeError:
                        pass
                    else:
                        self.created_entry[disco_entry] = True

        # if (artist := self.artist) is not None:         # Ensure the disco entries have the artist with all known pages
        #     name_matches = artist.name.matches
        #     for site_entries in discography.values():
        #         for entry in site_entries:
        #             for edition in entry:               # Set artist on editions first - entry.artists looks at editions
        #                 if (ea := edition.artist) is None or (ea is not artist and name_matches(ea.name)):
        #                     # noinspection PyPropertyAccess
        #                     edition.artist = artist
        #             if (ea := entry.artist) is None or (ea is not artist and name_matches(ea.name)):
        #                 # noinspection PyPropertyAccess
        #                 entry.artist = artist

        return discography