예제 #1
0
 def _importer(self):
     """Instantiate an appropriate OPDSImporter for the given Collection."""
     collection = self.collection
     metadata_client = AuthorNameCanonicalizer(self._db)
     return OPDSImporter(self._db,
                         collection,
                         data_source_name=collection.data_source.name,
                         metadata_client=metadata_client)
예제 #2
0
    def __init__(self, _db=None, cmd_args=None):
        super(CheckContributorNamesOnWeb, self).__init__(_db=_db)

        parsed_args = self.parse_command_line(_db=self._db, cmd_args=cmd_args)
        self.mock_mode = parsed_args.mock

        if self.mock_mode:
            self.log.debug(
                "This is mocked run, with metadata coming from test files, rather than live OneClick connection."
            )
            self.base_path = os.path.split(__file__)[0]
            self.base_path = os.path.join(self.base_path, "tests")
            self.canonicalizer = MockAuthorNameCanonicalizer(self._db)
        else:
            self.canonicalizer = AuthorNameCanonicalizer(self._db)
예제 #3
0
 def __init__(self, _db):
     self._db = _db
     self.canonicalizer = AuthorNameCanonicalizer(self._db)
예제 #4
0
 def __init__(self, _db, canonicalizer=None):
     self._db = _db
     self.canonicalizer = canonicalizer or AuthorNameCanonicalizer(self._db)
예제 #5
0
    def process_item(self, identifier):
        try:
            new_info_counter = Counter()
            self.log.info("Processing identifier %r", identifier)
            metadatas = [m for m in self.api.info_for(identifier)]

            if identifier.type == Identifier.ISBN:
                # Currently info_for seeks the results of OCLC Work IDs only
                # This segment will get the metadata of any equivalent OCLC Numbers
                # as well.
                equivalents = Identifier.recursively_equivalent_identifier_ids(
                    self._db, [identifier.id])
                oclc_numbers = self._db.query(Identifier).\
                    filter(Identifier.id.in_(equivalents)).\
                    filter(Identifier.type==Identifier.OCLC_NUMBER).all()
                for oclc_number in oclc_numbers:
                    more_metadata = [m for m in self.api.info_for(oclc_number)]
                    metadatas += more_metadata
                    metadatas = [m for m in metadatas if m]

            for metadata in metadatas:
                other_identifier, ignore = metadata.primary_identifier.load(
                    self._db)
                oclc_editions = other_identifier.primarily_identifies

                # Keep track of the number of editions OCLC associates
                # with this identifier.
                other_identifier.add_measurement(
                    self.data_source, Measurement.PUBLISHED_EDITIONS,
                    len(oclc_editions))

                # Clean up contributor information.
                self.apply_viaf_to_contributor_data(metadata)
                # Remove any empty ContributorData objects that may have
                # been created.
                metadata.contributors = filter(
                    lambda c: c.sort_name or c.display_name,
                    metadata.contributors)

                # When metadata is applied, it must be given a client that can
                # response to 'canonicalize_author_name'. Usually this is an
                # OPDSImporter that reaches out to the Metadata Wrangler, but
                # in the case of being _on_ the Metadata Wrangler...:
                from canonicalize import AuthorNameCanonicalizer
                metadata_client = AuthorNameCanonicalizer(self._db,
                                                          oclcld=self.api,
                                                          viaf=self.viaf)

                num_new_isbns = self.new_isbns(metadata)
                new_info_counter['isbns'] += num_new_isbns
                if oclc_editions:
                    # There are existing OCLC editions. Apply any new information to them.
                    for edition in oclc_editions:
                        metadata, new_info_counter = self.apply_metadata_to_edition(
                            edition, metadata, metadata_client,
                            new_info_counter)
                else:
                    # Create a new OCLC edition to hold the information.
                    edition, ignore = get_one_or_create(
                        self._db,
                        Edition,
                        data_source=self.data_source,
                        primary_identifier=other_identifier)
                    metadata, new_info_counter = self.apply_metadata_to_edition(
                        edition, metadata, metadata_client, new_info_counter)
                    # Set the new OCLC edition's identifier equivalent to this
                    # identifier so we know they're related.
                    self.set_equivalence(identifier, metadata)

                self.log.info(
                    "Total: %(editions)d editions, %(isbns)d ISBNs, "\
                    "%(descriptions)d descriptions, %(subjects)d classifications.",
                    new_info_counter
                )
        except IOError as e:
            if ", but couldn't find location" in e.message:
                exception = "OCLC doesn't know about this ISBN: %r" % e
                transient = False
            else:
                exception = "OCLC raised an error: %r" % e
                transient = True
            return self.failure(identifier, exception, transient=transient)
        return identifier
예제 #6
0
 def setup(self):
     super(TestAuthorNameCanonicalizer, self).setup()
     self.log = logging.getLogger("Author Name Canonicalizer Test")
     self.canonicalizer = AuthorNameCanonicalizer(self._db)
     self.viaf_client = MockVIAFClientLookup(self._db, self.log)
     self.canonicalizer.viaf = self.viaf_client