def book_info_to_metadata(self, subgraph, book_info): """Filters raw book information to exclude irrelevant or unhelpful data. :returns: None if information is unhelpful; metadata object otherwise. """ if not self._has_relevant_types(book_info): # This book is not available in any format we're # interested in from a metadata perspective. return None (oclc_id_type, oclc_id, titles, descriptions, subjects, creator_uris, publisher_names, publication_dates, example_uris) = self.extract_useful_data(subgraph, book_info) if not oclc_id_type or not oclc_id: return None self.log.info("Processing edition %s: %r", oclc_id, titles) metadata = Metadata(self.source) metadata.primary_identifier = IdentifierData( type=oclc_id_type, identifier=oclc_id ) if titles: metadata.title = titles[0] for d in publication_dates: try: metadata.published = datetime.datetime.strptime(d[:4], "%Y") except Exception, e: pass
def book_info_to_metadata(self, subgraph, book_info): """Filters raw book information to exclude irrelevant or unhelpful data. :returns: None if information is unhelpful; metadata object otherwise. """ if not self._has_relevant_types(book_info): # This book is not available in any format we're # interested in from a metadata perspective. return None (oclc_id_type, oclc_id, titles, descriptions, subjects, creator_uris, publisher_names, publication_dates, example_uris) = self.extract_useful_data(subgraph, book_info) if not oclc_id_type or not oclc_id: return None self.log.info("Processing edition %s: %r", oclc_id, titles) metadata = Metadata(self.source) metadata.primary_identifier = IdentifierData(type=oclc_id_type, identifier=oclc_id) if titles: metadata.title = titles[0] for d in publication_dates: try: metadata.published = datetime.datetime.strptime(d[:4], "%Y") except Exception, e: pass
def lookup_info_to_metadata(self, lookup_representation): """Transforms a NoveList JSON representation into a Metadata object""" if not lookup_representation.content: return None lookup_info = json.loads(lookup_representation.content) book_info = lookup_info['TitleInfo'] if book_info: novelist_identifier = book_info.get('ui') if not book_info or not novelist_identifier: # NoveList didn't know the ISBN. return None primary_identifier, ignore = Identifier.for_foreign_id( self._db, Identifier.NOVELIST_ID, novelist_identifier) metadata = Metadata(self.source, primary_identifier=primary_identifier) # Get the equivalent ISBN identifiers. metadata.identifiers += self._extract_isbns(book_info) author = book_info.get('author') if author: metadata.contributors.append(ContributorData(sort_name=author)) description = book_info.get('description') if description: metadata.links.append( LinkData(rel=Hyperlink.DESCRIPTION, content=description, media_type=Representation.TEXT_PLAIN)) audience_level = book_info.get('audience_level') if audience_level: metadata.subjects.append( SubjectData(Subject.FREEFORM_AUDIENCE, audience_level)) novelist_rating = book_info.get('rating') if novelist_rating: metadata.measurements.append( MeasurementData(Measurement.RATING, novelist_rating)) # Extract feature content if it is available. series_info = None appeals_info = None lexile_info = None goodreads_info = None recommendations_info = None feature_content = lookup_info.get('FeatureContent') if feature_content: series_info = feature_content.get('SeriesInfo') appeals_info = feature_content.get('Appeals') lexile_info = feature_content.get('LexileInfo') goodreads_info = feature_content.get('GoodReads') recommendations_info = feature_content.get('SimilarTitles') metadata, title_key = self.get_series_information( metadata, series_info, book_info) metadata.title = book_info.get(title_key) subtitle = TitleProcessor.extract_subtitle(metadata.title, book_info.get('full_title')) metadata.subtitle = self._scrub_subtitle(subtitle) # TODO: How well do we trust this data? We could conceivably bump up # the weight here. if appeals_info: extracted_genres = False for appeal in appeals_info: genres = appeal.get('genres') if genres: for genre in genres: metadata.subjects.append( SubjectData(Subject.TAG, genre['Name'])) extracted_genres = True if extracted_genres: break if lexile_info: metadata.subjects.append( SubjectData(Subject.LEXILE_SCORE, lexile_info['Lexile'])) if goodreads_info: metadata.measurements.append( MeasurementData(Measurement.RATING, goodreads_info['average_rating'])) metadata = self.get_recommendations(metadata, recommendations_info) # If nothing interesting comes from the API, ignore it. if not (metadata.measurements or metadata.series_position or metadata.series or metadata.subjects or metadata.links or metadata.subtitle or metadata.recommendations): metadata = None return metadata
def lookup_info_to_metadata(self, lookup_representation): """Transforms a NoveList JSON representation into a Metadata object""" if not lookup_representation.content: return None lookup_info = json.loads(lookup_representation.content) book_info = lookup_info['TitleInfo'] if book_info: novelist_identifier = book_info.get('ui') if not book_info or not novelist_identifier: # NoveList didn't know the ISBN. return None primary_identifier, ignore = Identifier.for_foreign_id( self._db, Identifier.NOVELIST_ID, novelist_identifier ) metadata = Metadata(self.source, primary_identifier=primary_identifier) # Get the equivalent ISBN identifiers. metadata.identifiers += self._extract_isbns(book_info) author = book_info.get('author') if author: metadata.contributors.append(ContributorData(sort_name=author)) description = book_info.get('description') if description: metadata.links.append(LinkData( rel=Hyperlink.DESCRIPTION, content=description, media_type=Representation.TEXT_PLAIN )) audience_level = book_info.get('audience_level') if audience_level: metadata.subjects.append(SubjectData( Subject.FREEFORM_AUDIENCE, audience_level )) novelist_rating = book_info.get('rating') if novelist_rating: metadata.measurements.append(MeasurementData( Measurement.RATING, novelist_rating )) # Extract feature content if it is available. series_info = None appeals_info = None lexile_info = None goodreads_info = None recommendations_info = None feature_content = lookup_info.get('FeatureContent') if feature_content: series_info = feature_content.get('SeriesInfo') appeals_info = feature_content.get('Appeals') lexile_info = feature_content.get('LexileInfo') goodreads_info = feature_content.get('GoodReads') recommendations_info = feature_content.get('SimilarTitles') metadata, title_key = self.get_series_information( metadata, series_info, book_info ) metadata.title = book_info.get(title_key) subtitle = TitleProcessor.extract_subtitle( metadata.title, book_info.get('full_title') ) metadata.subtitle = self._scrub_subtitle(subtitle) if appeals_info: extracted_genres = False for appeal in appeals_info: genres = appeal.get('genres') if genres: for genre in genres: metadata.subjects.append(SubjectData( Subject.TAG, genre['Name'] )) extracted_genres = True if extracted_genres: break if lexile_info: metadata.subjects.append(SubjectData( Subject.LEXILE_SCORE, lexile_info['Lexile'] )) if goodreads_info: metadata.measurements.append(MeasurementData( Measurement.RATING, goodreads_info['average_rating'] )) metadata = self.get_recommendations(metadata, recommendations_info) # If nothing interesting comes from the API, ignore it. if not (metadata.measurements or metadata.series_position or metadata.series or metadata.subjects or metadata.links or metadata.subtitle or metadata.recommendations ): metadata = None return metadata