def _importer(self): """Instantiate an appropriate OPDSImporter for the given Collection.""" collection = self.collection metadata_client = AuthorNameCanonicalizer(self._db) return OPDSImporter(self._db, collection, data_source_name=collection.data_source.name, metadata_client=metadata_client)
def __init__(self, _db=None, cmd_args=None): super(CheckContributorNamesOnWeb, self).__init__(_db=_db) parsed_args = self.parse_command_line(_db=self._db, cmd_args=cmd_args) self.mock_mode = parsed_args.mock if self.mock_mode: self.log.debug( "This is mocked run, with metadata coming from test files, rather than live OneClick connection." ) self.base_path = os.path.split(__file__)[0] self.base_path = os.path.join(self.base_path, "tests") self.canonicalizer = MockAuthorNameCanonicalizer(self._db) else: self.canonicalizer = AuthorNameCanonicalizer(self._db)
def __init__(self, _db): self._db = _db self.canonicalizer = AuthorNameCanonicalizer(self._db)
def __init__(self, _db, canonicalizer=None): self._db = _db self.canonicalizer = canonicalizer or AuthorNameCanonicalizer(self._db)
def process_item(self, identifier): try: new_info_counter = Counter() self.log.info("Processing identifier %r", identifier) metadatas = [m for m in self.api.info_for(identifier)] if identifier.type == Identifier.ISBN: # Currently info_for seeks the results of OCLC Work IDs only # This segment will get the metadata of any equivalent OCLC Numbers # as well. equivalents = Identifier.recursively_equivalent_identifier_ids( self._db, [identifier.id]) oclc_numbers = self._db.query(Identifier).\ filter(Identifier.id.in_(equivalents)).\ filter(Identifier.type==Identifier.OCLC_NUMBER).all() for oclc_number in oclc_numbers: more_metadata = [m for m in self.api.info_for(oclc_number)] metadatas += more_metadata metadatas = [m for m in metadatas if m] for metadata in metadatas: other_identifier, ignore = metadata.primary_identifier.load( self._db) oclc_editions = other_identifier.primarily_identifies # Keep track of the number of editions OCLC associates # with this identifier. other_identifier.add_measurement( self.data_source, Measurement.PUBLISHED_EDITIONS, len(oclc_editions)) # Clean up contributor information. self.apply_viaf_to_contributor_data(metadata) # Remove any empty ContributorData objects that may have # been created. metadata.contributors = filter( lambda c: c.sort_name or c.display_name, metadata.contributors) # When metadata is applied, it must be given a client that can # response to 'canonicalize_author_name'. Usually this is an # OPDSImporter that reaches out to the Metadata Wrangler, but # in the case of being _on_ the Metadata Wrangler...: from canonicalize import AuthorNameCanonicalizer metadata_client = AuthorNameCanonicalizer(self._db, oclcld=self.api, viaf=self.viaf) num_new_isbns = self.new_isbns(metadata) new_info_counter['isbns'] += num_new_isbns if oclc_editions: # There are existing OCLC editions. Apply any new information to them. for edition in oclc_editions: metadata, new_info_counter = self.apply_metadata_to_edition( edition, metadata, metadata_client, new_info_counter) else: # Create a new OCLC edition to hold the information. edition, ignore = get_one_or_create( self._db, Edition, data_source=self.data_source, primary_identifier=other_identifier) metadata, new_info_counter = self.apply_metadata_to_edition( edition, metadata, metadata_client, new_info_counter) # Set the new OCLC edition's identifier equivalent to this # identifier so we know they're related. self.set_equivalence(identifier, metadata) self.log.info( "Total: %(editions)d editions, %(isbns)d ISBNs, "\ "%(descriptions)d descriptions, %(subjects)d classifications.", new_info_counter ) except IOError as e: if ", but couldn't find location" in e.message: exception = "OCLC doesn't know about this ISBN: %r" % e transient = False else: exception = "OCLC raised an error: %r" % e transient = True return self.failure(identifier, exception, transient=transient) return identifier
def setup(self): super(TestAuthorNameCanonicalizer, self).setup() self.log = logging.getLogger("Author Name Canonicalizer Test") self.canonicalizer = AuthorNameCanonicalizer(self._db) self.viaf_client = MockVIAFClientLookup(self._db, self.log) self.canonicalizer.viaf = self.viaf_client