def test_create_item(self): beet_beacon_major_id = '65370' beet_beacon_minor_id = '49339' beet_beacon_title = 'Beet Beacon' lemon_beacon_major_id = '48448' lemon_beacon_minor_id = '18544' lemon_beacon_title = 'Lemon Tart Beacon' item, made_new = get_one_or_create( self._db, DisplayItem, create_method_kwargs=dict( beacon_major_id=beet_beacon_major_id, beacon_minor_id=beet_beacon_minor_id, ), id=self._id) eq_(True, made_new) collection, made_new = get_one_or_create(self._db, Collection, create_method_kwargs=dict( name="Cool Books", curator="Jane Curator", ), id=self._id) eq_(True, made_new) item.collections.append(collection) eq_(collection.id, item.collections[0].id)
def package_setup(): """Make sure the database schema is initialized and initial data is in place. """ engine, connection = DatabaseTest.get_database_connection() # First, recreate the schema. # # Base.metadata.drop_all(connection) doesn't work here, so we # approximate by dropping everything except the materialized # views. for table in reversed(Base.metadata.sorted_tables): if not table.name.startswith('mv_'): engine.execute(table.delete()) Base.metadata.create_all(connection) # Initialize basic database data needed by the application. _db = Session(connection) SessionManager.initialize_data(_db) # Create the patron used by the dummy authentication mechanism. # TODO: This can be probably be moved to circulation. get_one_or_create( _db, Patron, authorization_identifier="200", create_method_kwargs=dict(external_identifier="200200200") ) _db.commit() connection.close() engine.dispose()
def _collection(self, name=u"Faketown Public Library"): source, ignore = get_one_or_create(self._db, DataSource, name=name) return get_one_or_create(self._db, Collection, name=name, data_source=source, client_id=u"abc", client_secret=u"def")[0]
def to_collection(self, _db): """Find or create a Collection object for this Overdrive Advantage account. :return: a 2-tuple of Collections (primary Overdrive collection, Overdrive Advantage collection) """ # First find the parent Collection. parent = get_one(_db, Collection, external_account_id=self.parent_library_id, protocol=Collection.OVERDRIVE) if not parent: # Without the parent's credentials we can't access the child. raise ValueError( "Cannot create a Collection whose parent does not already exist." ) name = parent.name + " / " + self.name child, ignore = get_one_or_create(_db, Collection, parent_id=parent.id, protocol=Collection.OVERDRIVE, external_account_id=self.library_id, create_method_kwargs=dict(name=name)) # Set or update the name of the collection to reflect the name of # the library, just in case that name has changed. child.name = name return parent, child
def __init__(self, _db, collection=None, *args, **kwargs): self.access_token_requests = [] self.requests = [] self.responses = [] if not collection: # OverdriveAPI needs a Collection, but none was provided. # Just create a basic one. library = Library.instance(_db) collection, ignore = get_one_or_create( _db, Collection, name="Test Overdrive Collection", protocol=Collection.OVERDRIVE, create_method_kwargs=dict(external_account_id=u'c')) collection.external_integration.username = u'a' collection.external_integration.password = u'b' collection.external_integration.set_setting('website_id', 'd') library.collections.append(collection) # The constructor will always make a request for the collection token. self.queue_response( 200, content=self.mock_collection_token("collection token")) self.access_token_response = self.mock_access_token_response( "bearer token") super(MockOverdriveAPI, self).__init__(_db, collection, *args, **kwargs)
def _customlist(self, foreign_identifier=None, name=None, data_source_name=DataSource.NYT, num_entries=1, entries_exist_as_works=True ): data_source = DataSource.lookup(self._db, data_source_name) foreign_identifier = foreign_identifier or self._str now = datetime.utcnow() customlist, ignore = get_one_or_create( self._db, CustomList, create_method_kwargs=dict( created=now, updated=now, name=name or self._str, description=self._str, ), data_source=data_source, foreign_identifier=foreign_identifier ) editions = [] for i in range(num_entries): if entries_exist_as_works: work = self._work(with_open_access_download=True) edition = work.presentation_edition else: edition = self._edition( data_source_name, title="Item %s" % i) edition.permanent_work_id="Permanent work ID %s" % self._str customlist.add_entry( edition, "Annotation %s" % i, first_appearance=now) editions.append(edition) return customlist, editions
def do_run(self, _db=None, cmd_args=None, output=sys.stdout): _db = _db or self._db parsed = self.parse_command_line(_db, cmd_args=cmd_args) integration, is_new = get_one_or_create( _db, ExternalIntegration, goal=ExternalIntegration.DRM_GOAL, protocol=ExternalIntegration.ADOBE_VENDOR_ID ) c = Configuration # All node values are string representations of hexidecimal # numbers. hex_node = int(parsed.node_value, 16) integration.setting(c.ADOBE_VENDOR_ID).value = parsed.vendor_id integration.setting(c.ADOBE_VENDOR_ID_NODE_VALUE).value = parsed.node_value delegates = parsed.delegate for delegate in delegates: if not delegate.endswith("/AdobeAuth/"): raise ValueError( 'Invalid delegate: %s. Expected something ending with "/AdobeAuth/"' % delegate ) integration.setting(Configuration.ADOBE_VENDOR_ID_DELEGATE_URL).value = ( json.dumps(delegates) ) _db.commit()
def _classification(self, identifier, subject, data_source, weight=1): return get_one_or_create(self._db, Classification, identifier=identifier, subject=subject, data_source=data_source, weight=weight)[0]
def _integration_client(self, url=None, shared_secret=None): url = url or self._url secret = shared_secret or u"secret" return get_one_or_create(self._db, IntegrationClient, shared_secret=secret, create_method_kwargs=dict(url=url))[0]
def _integration(self, _db, id, name, protocol, goal): """Find or create the ExternalIntegration referred to.""" if not id and not name and not (protocol and goal): raise ValueError( "An integration must by identified by either ID, name, or the combination of protocol and goal." ) integration = None if id: integration = get_one( _db, ExternalIntegration, ExternalIntegration.id==id ) if not integration: raise ValueError("No integration with ID %s." % id) if name: integration = get_one(_db, ExternalIntegration, name=name) if not integration and not (protocol and goal): raise ValueError( 'No integration with name "%s". To create it, you must also provide protocol and goal.' % name ) if not integration and (protocol and goal): integration, is_new = get_one_or_create( _db, ExternalIntegration, protocol=protocol, goal=goal ) if name: integration.name = name return integration
def to_customlist(self, _db, dictreader): """Turn the CSV file in `dictreader` into a CustomList. TODO: Keep track of the list's current members. If any item was on the list but is no longer on the list, set its last_appeared date to its most recent appearance. """ data_source = DataSource.lookup(_db, self.data_source_name) now = datetime.datetime.utcnow() # Find or create the CustomList object itself. custom_list, was_new = get_one_or_create( _db, CustomList, data_source=data_source, foreign_identifier=self.foreign_identifier, create_method_kwargs = dict( created=now, ) ) custom_list.updated = now # Turn the rows of the CSV file into a sequence of Metadata # objects, then turn each Metadata into a CustomListEntry object. for metadata in self.to_metadata(dictreader): entry = self.metadata_to_list_entry( custom_list, data_source, now, metadata)
def validate_email(self): # Manually validate an email address, without the admin having to click on a confirmation link uuid = flask.request.form.get("uuid") email = flask.request.form.get("email") library = self.library_for_request(uuid) if isinstance(library, ProblemDetail): return library email_types = { "contact_email": Hyperlink.INTEGRATION_CONTACT_REL, "help_email": Hyperlink.HELP_REL, "copyright_email": Hyperlink.COPYRIGHT_DESIGNATED_AGENT_REL } hyperlink = None if email_types.get(email): hyperlink = Library.get_hyperlink(library, email_types[email]) if not hyperlink or not hyperlink.resource or isinstance( hyperlink, ProblemDetail): return INVALID_CONTACT_URI.detailed( "The contact URI for this library is missing or invalid") validation, is_new = get_one_or_create(self._db, Validation, resource=hyperlink.resource) validation.restart() validation.mark_as_successful() return self.library_details(uuid)
def run(self): self.timestamp, new = get_one_or_create( self._db, Timestamp, service=self.service_name, create_method_kwargs=dict(counter=self.default_counter)) offset = self.timestamp.counter or self.default_counter started_at = datetime.datetime.utcnow() while not self.stop_running: a = time.time() old_offset = offset try: new_offset = self.run_once(offset) except Exception, e: self.log.error("Error during run: %s", e, exc_info=e) break to_sleep = 0 if new_offset == 0: # We completed a sweep. We're done. self.stop_running = True self.cleanup() self.counter = new_offset self.timestamp.counter = self.counter self._db.commit() if old_offset != new_offset: self.log.debug("Old offset: %s" % offset) self.log.debug("New offset: %s", new_offset) b = time.time() self.log.debug("Elapsed: %.2f sec" % (b - a)) if to_sleep > 0: if old_offset != new_offset: self.log.debug("Sleeping for %.1f", to_sleep) time.sleep(to_sleep) offset = new_offset
def run(self): if self.keep_timestamp: self.timestamp, new = get_one_or_create( self._db, Timestamp, service=self.service_name, create_method_kwargs=dict( timestamp=self.default_start_time ) ) start = self.timestamp.timestamp or self.default_start_time else: start = self.default_start_time self.timestamp = None while not self.stop_running: cutoff = datetime.datetime.utcnow() new_timestamp = self.run_once(start, cutoff) or cutoff duration = datetime.datetime.utcnow() - cutoff to_sleep = self.interval_seconds-duration.seconds-1 self.cleanup() if self.keep_timestamp: self.timestamp.timestamp = new_timestamp self._db.commit() # TODO: This could be a little nicer, but basically we now # want monitors to run through once and then stop. if True: self.stop_running = True elif to_sleep > 0: self.log.debug("Sleeping for %.1f", to_sleep) time.sleep(to_sleep) start = new_timestamp
def _integration_client(self, url=None, shared_secret=None): url = url or self._url secret = shared_secret or u"secret" return get_one_or_create( self._db, IntegrationClient, shared_secret=secret, create_method_kwargs=dict(url=url) )[0]
def _integration_client(self, url=None): url = url or self._url return get_one_or_create(self._db, IntegrationClient, url=url, key=u"abc", secret=u"def")[0]
def test_fetch_ignores_feeds_without_content(self): facets = Facets.default(self._default_library) pagination = Pagination.default() lane = self._lane(u"My Lane", languages=['eng', 'chi']) # Create a feed without content (i.e. don't update it) contentless_feed = get_one_or_create( self._db, CachedFeed, lane_id=lane.id, type=CachedFeed.PAGE_TYPE, facets=unicode(facets.query_string), pagination=unicode(pagination.query_string))[0] # It's not returned because it hasn't been updated. args = (self._db, lane, CachedFeed.PAGE_TYPE, facets, pagination, None) feed, fresh = CachedFeed.fetch(*args) eq_(True, feed != contentless_feed) eq_(False, fresh) # But if the feed is updated, we get it back. feed.update(self._db, u"Just feedy things") result, fresh = CachedFeed.fetch(*args) eq_(True, fresh) eq_(feed, result)
def _lane(self, display_name=None, library=None, parent=None, genres=None, languages=None, fiction=None): display_name = display_name or self._str library = library or self._default_library lane, is_new = get_one_or_create( self._db, Lane, library=library, parent=parent, display_name=display_name, create_method_kwargs=dict(fiction=fiction)) if is_new and parent: lane.priority = len(parent.sublanes) - 1 if genres: if not isinstance(genres, list): genres = [genres] for genre in genres: if isinstance(genre, basestring): genre, ignore = Genre.lookup(self._db, genre) lane.genres.append(genre) if languages: if not isinstance(languages, list): languages = [languages] lane.languages = languages return lane
def _patron(self, external_identifier=None, library=None): external_identifier = external_identifier or self._str library = library or self._default_library return get_one_or_create(self._db, Patron, external_identifier=external_identifier, library=library)[0]
def to_custom_list_entry(self, custom_list, metadata_client, overwrite_old_data=False): """Turn this object into a CustomListEntry with associated Edition.""" _db = Session.object_session(custom_list) edition = self.to_edition(_db, metadata_client, overwrite_old_data) list_entry, is_new = get_one_or_create( _db, CustomListEntry, edition=edition, customlist=custom_list ) if (not list_entry.first_appearance or list_entry.first_appearance > self.first_appearance): if list_entry.first_appearance: self.log.info( "I thought %s first showed up at %s, but then I saw it earlier, at %s!", self.metadata.title, list_entry.first_appearance, self.first_appearance ) list_entry.first_appearance = self.first_appearance if (not list_entry.most_recent_appearance or list_entry.most_recent_appearance < self.most_recent_appearance): if list_entry.most_recent_appearance: self.log.info( "I thought %s most recently showed up at %s, but then I saw it later, at %s!", self.metadata.title, list_entry.most_recent_appearance, self.most_recent_appearance ) list_entry.most_recent_appearance = self.most_recent_appearance list_entry.annotation = self.annotation list_entry.set_work(self.metadata, metadata_client) return list_entry, is_new
def _patron(self, external_identifier=None, library=None): external_identifier = external_identifier or self._str library = library or self._default_library return get_one_or_create( self._db, Patron, external_identifier=external_identifier, library=library )[0]
def _coverage_record( self, edition, coverage_source, operation=None, status=CoverageRecord.SUCCESS, collection=None, exception=None, ): if isinstance(edition, Identifier): identifier = edition else: identifier = edition.primary_identifier record, ignore = get_one_or_create(self._db, CoverageRecord, identifier=identifier, data_source=coverage_source, operation=operation, collection=collection, create_method_kwargs=dict( timestamp=datetime.utcnow(), status=status, exception=exception, )) return record
def _customlist(self, foreign_identifier=None, name=None, data_source_name=DataSource.NYT, num_entries=1, entries_exist_as_works=True): data_source = DataSource.lookup(self._db, data_source_name) foreign_identifier = foreign_identifier or self._str now = datetime.utcnow() customlist, ignore = get_one_or_create( self._db, CustomList, create_method_kwargs=dict( created=now, updated=now, name=name or self._str, description=self._str, ), data_source=data_source, foreign_identifier=foreign_identifier) editions = [] for i in range(num_entries): if entries_exist_as_works: work = self._work(with_open_access_download=True) edition = work.presentation_edition else: edition = self._edition(data_source_name, title="Item %s" % i) edition.permanent_work_id = "Permanent work ID %s" % self._str customlist.add_entry(edition, "Annotation %s" % i, first_appearance=now) editions.append(edition) return customlist, editions
def _library(self, name=None, short_name=None): name=name or self._str short_name = short_name or self._str library, ignore = get_one_or_create( self._db, Library, name=name, short_name=short_name, create_method_kwargs=dict(uuid=str(uuid.uuid4())), ) return library
def load(self, metadata, geometry): metadata = json.loads(metadata) external_id = metadata['id'] type = metadata['type'] parent_external_id = metadata['parent_id'] name = metadata['name'] aliases = metadata.get('aliases', []) abbreviated_name = metadata.get('abbreviated_name', None) if parent_external_id: parent = self.places_by_external_id[parent_external_id] else: parent = None # This gives us a Geometry object. Set its SRID so the database # knows it's using real-world latitude and longitude. geometry = GeometryUtility.from_geojson(geometry) place, is_new = get_one_or_create( self._db, Place, external_id=external_id, type=type, parent=parent, create_method_kwargs=dict(geometry=geometry)) # Set these values, even the ones that were set in # create_method_kwargs, so that we can update any that have # changed. place.external_name = name place.abbreviated_name = abbreviated_name place.geometry = geometry # We only ever add aliases. If the database contains an alias # for this place that doesn't show up in the metadata, it # may have been created manually. for alias in aliases: name = alias['name'] language = alias['language'] alias, is_new = get_one_or_create(self._db, PlaceAlias, place=place, name=name, language=language) self.places_by_external_id[external_id] = place return place, is_new
def make_publication(self, publisher, post): """Create a Publication for this Publisher and this Post. This is your chance to modify the content of the Post for different publishers. """ return get_one_or_create( self._db, Publication, service=publisher.service, post=post )
def imp(db, data_source, path, url): modified = datetime.datetime.fromtimestamp(os.stat(path).st_mtime) data = open(path).read() representation, ignore = get_one_or_create(db, Representation, url=url, data_source=data_source) representation.status_code = 200 representation.content = data representation.media_type = 'application/xml' representation.fetched_at = modified print url
def test_neglected_source_cannot_be_normalized(self): obj, new = get_one_or_create( self._db, DataSource, name="Neglected source" ) neglected_source = obj p = self._popularity(100, neglected_source) eq_(None, p.normalized_value) r = self._rating(100, neglected_source) eq_(None, r.normalized_value)
def _licensepool(self, edition, open_access=True, data_source_name=DataSource.GUTENBERG, with_open_access_download=False, set_edition_as_presentation=False, collection=None): source = DataSource.lookup(self._db, data_source_name) if not edition: edition = self._edition(data_source_name) collection = collection or self._default_collection pool, ignore = get_one_or_create( self._db, LicensePool, create_method_kwargs=dict( open_access=open_access), identifier=edition.primary_identifier, data_source=source, collection=collection, availability_time=datetime.utcnow() ) if set_edition_as_presentation: pool.presentation_edition = edition if with_open_access_download: pool.open_access = True url = "http://foo.com/" + self._str media_type = MediaTypes.EPUB_MEDIA_TYPE link, new = pool.identifier.add_link( Hyperlink.OPEN_ACCESS_DOWNLOAD, url, source, media_type ) # Add a DeliveryMechanism for this download pool.set_delivery_mechanism( media_type, DeliveryMechanism.NO_DRM, RightsStatus.GENERIC_OPEN_ACCESS, link.resource, ) representation, is_new = self._representation( url, media_type, "Dummy content", mirrored=True) link.resource.representation = representation else: # Add a DeliveryMechanism for this licensepool pool.set_delivery_mechanism( MediaTypes.EPUB_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.UNKNOWN, None ) pool.licenses_owned = pool.licenses_available = 1 return pool
def _work_coverage_record(self, work, operation=None, status=CoverageRecord.SUCCESS): record, ignore = get_one_or_create( self._db, WorkCoverageRecord, work=work, operation=operation, create_method_kwargs = dict( timestamp=datetime.utcnow(), status=status, ) ) return record
def _collection(self, name=None, protocol=Collection.OPDS_IMPORT, external_account_id=None, url=None, username=None, password=None): name = name or self._str collection, ignore = get_one_or_create( self._db, Collection, name=name, protocol=protocol ) collection.external_account_id = external_account_id collection.external_integration.url = url collection.external_integration.username = username collection.external_integration.password = password return collection
def _representation(self, url=None, media_type=None, content=None, mirrored=False): url = url or "http://foo.com/" + self._str repr, is_new = get_one_or_create( self._db, Representation, url=url) repr.media_type = media_type if media_type and content: repr.content = content repr.fetched_at = datetime.utcnow() if mirrored: repr.mirror_url = "http://foo.com/" + self._str repr.mirrored_at = datetime.utcnow() return repr, is_new
def to_customlist(self, _db): """Turn this NYTBestSeller list into a CustomList object.""" data_source = DataSource.lookup(_db, DataSource.NYT) l, was_new = get_one_or_create( _db, CustomList, data_source=data_source, foreign_identifier=self.foreign_identifier, create_method_kwargs=dict(created=self.created, )) l.name = self.name l.updated = self.updated self.update_custom_list(l) return l
def make_default_library(cls, _db): """Ensure that the default library exists in the given database. This can be called by code intended for use in testing but not actually within a DatabaseTest subclass. """ library, ignore = get_one_or_create(_db, Library, create_method_kwargs=dict( uuid=unicode(uuid.uuid4()), name="default", ), short_name="default") collection, ignore = get_one_or_create(_db, Collection, name="Default Collection") integration = collection.create_external_integration( ExternalIntegration.OPDS_IMPORT) integration.goal = ExternalIntegration.LICENSE_GOAL if collection not in library.collections: library.collections.append(collection) return library
def _license(self, pool, identifier=None, checkout_url=None, status_url=None, expires=None, remaining_checkouts=None, concurrent_checkouts=None): identifier = identifier or self._str checkout_url = checkout_url or self._str status_url = status_url or self._str license, ignore = get_one_or_create( self._db, License, identifier=identifier, license_pool=pool, checkout_url=checkout_url, status_url=status_url, expires=expires, remaining_checkouts=remaining_checkouts, concurrent_checkouts=concurrent_checkouts, ) return license
def make_default_library(cls, _db): """Ensure that the default library exists in the given database. This can be called by code intended for use in testing but not actually within a DatabaseTest subclass. """ library, ignore = get_one_or_create( _db, Library, create_method_kwargs=dict( uuid=unicode(uuid.uuid4()), name="default", ), short_name="default" ) collection, ignore = get_one_or_create( _db, Collection, name="Default Collection" ) integration = collection.create_external_integration( ExternalIntegration.OPDS_IMPORT ) integration.goal = ExternalIntegration.LICENSE_GOAL if collection not in library.collections: library.collections.append(collection) return library
def process_file(_db, filename, class_): a = 0 for i in open(filename): v = i.strip().split("\t") if class_ == Subject and len(v) == 3: type, identifier, name = v elif len(v) == 2: type, identifier = v name = None else: print "Bad data: %r" % i args = {} if class_ == Subject and name: args['name'] = name get_one_or_create( _db, class_, type=type, identifier=identifier, create_method_kwargs=args ) a += 1 if not a % 1000: _db.commit() print a, class_.__name__ _db.commit()
def _collection(self, name=None, protocol=ExternalIntegration.OPDS_IMPORT, external_account_id=None, url=None, username=None, password=None, data_source_name=None): name = name or self._str collection, ignore = get_one_or_create( self._db, Collection, name=name ) collection.external_account_id = external_account_id integration = collection.create_external_integration(protocol) integration.goal = ExternalIntegration.LICENSE_GOAL integration.url = url integration.username = username integration.password = password if data_source_name: collection.data_source = data_source_name return collection
def imp(db, data_source, identifier, cache, library): i = identifier.identifier fn = i + ".json" if not cache.exists(fn): return fn = cache._filename(fn) modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime) data = cache.open(fn).read() a = dict(collection_token=library['collectionToken'], item_id=i) url = OverdriveAPI.METADATA_ENDPOINT % a representation, ignore = get_one_or_create(db, Representation, url=url, data_source=data_source, identifier=identifier) representation.status_code = 200 representation.content = data representation.media_type = 'application/json' representation.fetched_at = modified print identifier
def _coverage_record(self, edition, coverage_source, operation=None, status=CoverageRecord.SUCCESS, collection=None, exception=None, ): if isinstance(edition, Identifier): identifier = edition else: identifier = edition.primary_identifier record, ignore = get_one_or_create( self._db, CoverageRecord, identifier=identifier, data_source=coverage_source, operation=operation, collection=collection, create_method_kwargs = dict( timestamp=datetime.utcnow(), status=status, exception=exception, ) ) return record
def imp(db, data_source, identifier, cache): i = identifier.identifier type = identifier.type location = None status_code = 200 media_type = "application/ld+json" if type == Identifier.OCLC_WORK: url = OCLCLinkedData.WORK_BASE_URL % dict(id=i, type="work") elif type == Identifier.OCLC_NUMBER: url = OCLCLinkedData.BASE_URL % dict(id=i, type="oclc") elif type == Identifier.ISBN: url = OCLCLinkedData.ISBN_BASE_URL % dict(id=i) media_type = None status_code = 301 representation, new = get_one_or_create( db, Representation, url=url, data_source=data_source, identifier=identifier, ) if not new: print "Already did", identifier return False if not cache.exists(i): # print "Not cached", identifier return False fn = cache._filename(i) modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime) data = open(fn).read() if type == Identifier.ISBN: location = data data = None representation.status_code = status_code representation.content = data representation.location = location representation.media_type = media_type representation.fetched_at = modified return True
def timestamp(self): """Find or create a Timestamp for this Monitor. This does not use TimestampData because it relies on checking whether a Timestamp already exists in the database. A new timestamp will have .finish set to None, since the first run is presumably in progress. """ initial_timestamp = self.initial_start_time timestamp, new = get_one_or_create( self._db, Timestamp, service=self.service_name, service_type=Timestamp.MONITOR_TYPE, collection=self.collection, create_method_kwargs=dict( start=initial_timestamp, finish=None, counter=self.default_counter, ) ) return timestamp
def _external_integration(self, protocol, goal=None, settings=None, libraries=None, **kwargs ): integration = None if not libraries: integration, ignore = get_one_or_create( self._db, ExternalIntegration, protocol=protocol, goal=goal ) else: if not isinstance(libraries, list): libraries = [libraries] # Try to find an existing integration for one of the given # libraries. for library in libraries: integration = ExternalIntegration.lookup( self._db, protocol, goal, library=libraries[0] ) if integration: break if not integration: # Otherwise, create a brand new integration specifically # for the library. integration = ExternalIntegration( protocol=protocol, goal=goal, ) integration.libraries.extend(libraries) self._db.add(integration) for attr, value in kwargs.items(): setattr(integration, attr, value) settings = settings or dict() for key, value in settings.items(): integration.set_setting(key, value) return integration
def records(self, lane, annotator, start_time=None, force_refresh=False, mirror=None, query_batch_size=500, upload_batch_size=7500): """ Create and export a MARC file for the books in a lane. :param lane: The Lane to export books from. :param annotator: The Annotator to use when creating MARC records. :param start_time: Only include records that were created or modified after this time. :param force_refresh: Create new records even when cached records are available. :param mirror: Optional mirror to use instead of loading one from configuration. :param query_batch_size: Number of works to retrieve with a single database query. :param upload_batch_size: Number of records to mirror at a time. This is different from query_batch_size because S3 enforces a minimum size of 5MB for all parts of a multipart upload except the last, but 5MB of records would be too many works for a single query. """ # We mirror the content, if it's not empty. If it's empty, we create a CachedMARCFile # and Representation, but don't actually mirror it. if not mirror: storage_protocol = self.integration.setting(self.STORAGE_PROTOCOL).value mirror = MirrorUploader.sitewide(self._db) if mirror.NAME != storage_protocol: raise Exception("Sitewide mirror integration does not match configured storage protocol") if not mirror: raise Exception("No mirror integration is configured") # End time is before we start the query, because if any records are changed # during the processing we may not catch them, and they should be handled # again on the next run. end_time = datetime.datetime.utcnow() works_q = lane.works(self._db) if start_time: works_q = works_q.filter(MaterializedWorkWithGenre.last_update_time>=start_time) total = works_q.count() offset = 0 url = mirror.marc_file_url(self.library, lane, end_time, start_time) representation, ignore = get_one_or_create( self._db, Representation, url=url, media_type=Representation.MARC_MEDIA_TYPE) with mirror.multipart_upload(representation, url) as upload: output = StringIO() current_count = 0 while offset < total: batch_q = works_q.order_by( MaterializedWorkWithGenre.works_id).offset( offset).limit(query_batch_size) for work in batch_q: record = self.create_record( work, annotator, force_refresh, self.integration) if record: output.write(record.as_marc()) current_count += 1 if current_count == upload_batch_size: content = output.getvalue() if content: upload.upload_part(content) output.close() output = StringIO() current_count = 0 offset += query_batch_size # Upload anything left over. content = output.getvalue() if content: upload.upload_part(content) output.close() representation.fetched_at = end_time if not representation.mirror_exception: cached, is_new = get_one_or_create( self._db, CachedMARCFile, library=self.library, lane=(lane if isinstance(lane, Lane) else None), start_time=start_time, create_method_kwargs=dict(representation=representation)) if not is_new: cached.representation = representation cached.end_time = end_time
def _catalog(self, name=u"Faketown Public Library"): source, ignore = get_one_or_create(self._db, DataSource, name=name)
def _subject(self, type, identifier): return get_one_or_create( self._db, Subject, type=type, identifier=identifier )[0]
def _classification(self, identifier, subject, data_source, weight=1): return get_one_or_create( self._db, Classification, identifier=identifier, subject=subject, data_source=data_source, weight=weight )[0]
def _contributor(self, sort_name=None, name=None, **kw_args): name = sort_name or name or self._str return get_one_or_create(self._db, Contributor, sort_name=unicode(name), **kw_args)
def _work(self, title=None, authors=None, genre=None, language=None, audience=None, fiction=True, with_license_pool=False, with_open_access_download=False, quality=0.5, series=None, presentation_edition=None, collection=None, data_source_name=None): """Create a Work. For performance reasons, this method does not generate OPDS entries or calculate a presentation edition for the new Work. Tests that rely on this information being present should call _slow_work() instead, which takes more care to present the sort of Work that would be created in a real environment. """ pools = [] if with_open_access_download: with_license_pool = True language = language or "eng" title = unicode(title or self._str) audience = audience or Classifier.AUDIENCE_ADULT if audience == Classifier.AUDIENCE_CHILDREN and not data_source_name: # TODO: This is necessary because Gutenberg's childrens books # get filtered out at the moment. data_source_name = DataSource.OVERDRIVE elif not data_source_name: data_source_name = DataSource.GUTENBERG if fiction is None: fiction = True new_edition = False if not presentation_edition: new_edition = True presentation_edition = self._edition( title=title, language=language, authors=authors, with_license_pool=with_license_pool, with_open_access_download=with_open_access_download, data_source_name=data_source_name, series=series, collection=collection, ) if with_license_pool: presentation_edition, pool = presentation_edition if with_open_access_download: pool.open_access = True pools = [pool] else: pools = presentation_edition.license_pools work, ignore = get_one_or_create( self._db, Work, create_method_kwargs=dict( audience=audience, fiction=fiction, quality=quality), id=self._id) if genre: if not isinstance(genre, Genre): genre, ignore = Genre.lookup(self._db, genre, autocreate=True) work.genres = [genre] work.random = 0.5 work.set_presentation_edition(presentation_edition) if pools: # make sure the pool's presentation_edition is set, # bc loan tests assume that. if not work.license_pools: for pool in pools: work.license_pools.append(pool) for pool in pools: pool.set_presentation_edition() # This is probably going to be used in an OPDS feed, so # fake that the work is presentation ready. work.presentation_ready = True work.calculate_opds_entries(verbose=False) return work