def test_fulfill_sends_analytics_event(self): self.pool.loan_to(self.patron) fulfillment = self.pool.delivery_mechanisms[0] fulfillment.content = "Fulfilled." fulfillment.content_link = None self.remote.queue_fulfill(fulfillment) config = { Configuration.POLICIES: { Configuration.ANALYTICS_POLICY: ["core.mock_analytics_provider"] } } with temp_config(config) as config: provider = MockAnalyticsProvider() analytics = Analytics.initialize(['core.mock_analytics_provider'], config) result = self.circulation.fulfill(self.patron, '1234', self.pool, self.pool.delivery_mechanisms[0]) # The fulfillment looks good. eq_(fulfillment, result) # An analytics event was created. mock = Analytics.instance().providers[0] eq_(1, mock.count) eq_(CirculationEvent.CM_FULFILL, mock.event_type)
def revoke_loan(self, patron, pin, licensepool): """Revoke a patron's loan for a book.""" loan = get_one(self._db, Loan, patron=patron, license_pool=licensepool, on_multiple='interchangeable') if loan: __transaction = self._db.begin_nested() logging.info("In revoke_loan(), deleting loan #%d" % loan.id) self._db.delete(loan) __transaction.commit() # Send out an analytics event to record the fact that # a loan was revoked through the circulation # manager. Analytics.collect_event( self._db, licensepool, CirculationEvent.CM_CHECKIN, ) if not licensepool.open_access: api = self.api_for_license_pool(licensepool) try: api.checkin(patron, pin, licensepool) except NotCheckedOut, e: # The book wasn't checked out in the first # place. Everything's fine. pass
def __init__(self, _db, collection, api_class=OverdriveAPI): """Constructor.""" super(OverdriveCirculationMonitor, self).__init__(_db, collection) self.api = api_class(_db, collection) self.maximum_consecutive_unchanged_books = ( self.MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS) self.analytics = Analytics(_db)
class OneClickCirculationMonitor(CollectionMonitor): """Maintain LicensePools for OneClick titles. Bibliographic data isn't inserted into new LicensePools until we hear from the metadata wrangler. """ SERVICE_NAME = "OneClick CirculationMonitor" DEFAULT_START_TIME = datetime.datetime(1970, 1, 1) INTERVAL_SECONDS = 1200 DEFAULT_BATCH_SIZE = 50 PROTOCOL = ExternalIntegration.RB_DIGITAL def __init__(self, _db, collection, batch_size=None, api_class=OneClickAPI, api_class_kwargs={}): super(OneClickCirculationMonitor, self).__init__(_db, collection) self.batch_size = batch_size or self.DEFAULT_BATCH_SIZE self.api = api_class(_db, self.collection, **api_class_kwargs) self.bibliographic_coverage_provider = ( OneClickBibliographicCoverageProvider( collection=self.collection, api_class=self.api, ) ) self.analytics = Analytics(self._db) def process_availability(self, media_type='ebook'): # get list of all titles, with availability info availability_list = self.api.get_ebook_availability_info(media_type=media_type) item_count = 0 for availability in availability_list: isbn = availability['isbn'] # boolean True/False value, not number of licenses available = availability['availability'] medium = availability.get('mediaType') license_pool, is_new, is_changed = self.api.update_licensepool_for_identifier(isbn, available, medium) # Log a circulation event for this work. if is_new: for library in self.collection.libraries: self.analytics.collect_event( library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, license_pool.last_checked) item_count += 1 if item_count % self.batch_size == 0: self._db.commit() return item_count def run(self): super(OneClickCirculationMonitor, self).run() def run_once(self, start, cutoff): ebook_count = self.process_availability(media_type='ebook') eaudio_count = self.process_availability(media_type='eaudio') self.log.info("Processed %d ebooks and %d audiobooks.", ebook_count, eaudio_count)
def test_borrow_sends_analytics_event(self): now = datetime.utcnow() loaninfo = LoanInfo( self.pool.identifier.type, self.pool.identifier.identifier, now, now + timedelta(seconds=3600), ) self.remote.queue_checkout(loaninfo) now = datetime.utcnow() config = { Configuration.POLICIES: { Configuration.ANALYTICS_POLICY: ["core.mock_analytics_provider"] } } with temp_config(config) as config: provider = MockAnalyticsProvider() analytics = Analytics.initialize(['core.mock_analytics_provider'], config) loan, hold, is_new = self.borrow() # The Loan looks good. eq_(loaninfo.identifier, loan.license_pool.identifier.identifier) eq_(self.patron, loan.patron) eq_(None, hold) eq_(True, is_new) # An analytics event was created. mock = Analytics.instance().providers[0] eq_(1, mock.count) eq_(CirculationEvent.CM_CHECKOUT, mock.event_type) # Try to 'borrow' the same book again. self.remote.queue_checkout(AlreadyCheckedOut()) loan, hold, is_new = self.borrow() eq_(False, is_new) # Since the loan already existed, no new analytics event was # sent. eq_(1, mock.count) # Now try to renew the book. self.remote.queue_checkout(loaninfo) loan, hold, is_new = self.borrow() eq_(False, is_new) # Renewals are counted as loans, since from an accounting # perspective they _are_ loans. eq_(2, mock.count) # Loans of open-access books go through a different code # path, but they count as loans nonetheless. self.pool.open_access = True self.remote.queue_checkout(loaninfo) loan, hold, is_new = self.borrow() eq_(3, mock.count)
def __init__(self, _db, collection, api_class=EnkiAPI): """Constructor.""" super(EnkiImport, self).__init__(_db, collection) self._db = _db self.api = api_class(_db, collection) self.collection_id = collection.id self.analytics = Analytics(_db) self.bibliographic_coverage_provider = ( EnkiBibliographicCoverageProvider(collection, api_class=self.api))
def __init__(self, _db, collection, api_class=BibliothecaAPI, **kwargs): _db = Session.object_session(collection) super(BibliothecaCirculationSweep, self).__init__(_db, collection, **kwargs) if isinstance(api_class, BibliothecaAPI): self.api = api_class else: self.api = api_class(_db, collection) self.analytics = Analytics(_db)
def _collect_checkout_event(self, licensepool): """Collect an analytics event indicating the given LicensePool was checked out via the circulation manager. """ Analytics.collect_event( self._db, licensepool, CirculationEvent.CM_CHECKOUT, )
def process_batch(self, identifiers): identifiers_by_threem_id = dict() threem_ids = set() for identifier in identifiers: threem_ids.add(identifier.identifier) identifiers_by_threem_id[identifier.identifier] = identifier identifiers_not_mentioned_by_threem = set(identifiers) now = datetime.datetime.utcnow() for circ in self.api.get_circulation_for(threem_ids): if not circ: continue threem_id = circ[Identifier][Identifier.THREEM_ID] identifier = identifiers_by_threem_id[threem_id] identifiers_not_mentioned_by_threem.remove(identifier) pool = identifier.licensed_through if not pool: # We don't have a license pool for this work. That # shouldn't happen--how did we know about the # identifier?--but it shouldn't be a big deal to # create one. pool, ignore = LicensePool.for_foreign_id( self._db, self.data_source, identifier.type, identifier.identifier) # 3M books are never open-access. pool.open_access = False Analytics.collect_event(self._db, pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, now) self.api.apply_circulation_information_to_licensepool(circ, pool) # At this point there may be some license pools left over # that 3M doesn't know about. This is a pretty reliable # indication that we no longer own any licenses to the # book. for identifier in identifiers_not_mentioned_by_threem: pool = identifier.licensed_through if not pool: continue if pool.licenses_owned > 0: if pool.presentation_edition: self.log.warn("Removing %s (%s) from circulation", pool.presentation_edition.title, pool.presentation_edition.author) else: self.log.warn("Removing unknown work %s from circulation.", identifier.identifier) pool.licenses_owned = 0 pool.licenses_available = 0 pool.licenses_reserved = 0 pool.patrons_in_hold_queue = 0 pool.last_checked = now
def __init__(self, _db, collection, batch_size=None, api_class=OneClickAPI, api_class_kwargs={}): super(OneClickCirculationMonitor, self).__init__(_db, collection) self.batch_size = batch_size or self.DEFAULT_BATCH_SIZE self.api = api_class(_db, self.collection, **api_class_kwargs) self.bibliographic_coverage_provider = ( OneClickBibliographicCoverageProvider( collection=self.collection, api_class=self.api, ) ) self.analytics = Analytics(self._db)
def process_book(self, bibliographic, availability): analytics = Analytics(self._db) license_pool, new_license_pool = availability.license_pool( self._db, self.collection, analytics) edition, new_edition = bibliographic.edition(self._db) license_pool.edition = edition policy = ReplacementPolicy( identifiers=False, subjects=True, contributions=True, formats=True, analytics=analytics, ) availability.apply(self._db, self.collection, replace=policy) if new_edition: bibliographic.apply(edition, self.collection, replace=policy) if new_license_pool or new_edition: # At this point we have done work equivalent to that done by # the Axis360BibliographicCoverageProvider. Register that the # work has been done so we don't have to do it again. identifier = edition.primary_identifier self.bibliographic_coverage_provider.handle_success(identifier) self.bibliographic_coverage_provider.add_coverage_record_for( identifier) return edition, license_pool
def __init__(self, collection, api_class=OdiloAPI, **kwargs): """Constructor. :param collection: Provide bibliographic coverage to all Odilo books in the given Collection. :param api_class: Instantiate this class with the given Collection, rather than instantiating OdiloAPI. """ super(OdiloBibliographicCoverageProvider, self).__init__(collection, **kwargs) if isinstance(api_class, OdiloAPI): # Use a previously instantiated OdiloAPI instance # rather than creating a new one. self.api = api_class else: # A web application should not use this option because it # will put a non-scoped session in the mix. _db = Session.object_session(collection) self.api = api_class(_db, collection) self.replacement_policy = ReplacementPolicy( identifiers=True, subjects=True, contributions=True, links=True, formats=True, rights=True, link_content=True, # even_if_not_apparently_updated=False, analytics=Analytics(self._db))
def __init__(self, _db, collection): self.odilo_bibliographic_coverage_provider = ( OdiloBibliographicCoverageProvider(collection, api_class=self)) if collection.protocol != ExternalIntegration.ODILO: raise ValueError( "Collection protocol is %s, but passed into OdiloAPI!" % collection.protocol) self._db = _db self.analytics = Analytics(self._db) self.collection_id = collection.id self.token = None self.client_key = collection.external_integration.username self.client_secret = collection.external_integration.password self.library_api_base_url = collection.external_integration.setting( self.LIBRARY_API_BASE_URL).value if not self.client_key or not self.client_secret or not self.library_api_base_url: raise CannotLoadConfiguration("Odilo configuration is incomplete.") # Use utf8 instead of unicode encoding settings = [ self.client_key, self.client_secret, self.library_api_base_url ] self.client_key, self.client_secret, self.library_api_base_url = ( setting.encode('utf8') for setting in settings) # Get set up with up-to-date credentials from the API. self.check_creds() if not self.token: raise CannotLoadConfiguration( "Invalid credentials for %s, cannot intialize API %s" % (self.client_key, self.library_api_base_url))
def load(cls): CoreConfiguration.load() config = CoreConfiguration.instance if not config.get(cls.POLICIES): config[cls.POLICIES] = {} if not config[cls.POLICIES].get(cls.ANALYTICS_POLICY): config[cls.POLICIES][cls.ANALYTICS_POLICY] = Analytics.initialize(["api.local_analytics_provuder"], config) cls.instance = config
def __init__(self, _db, collection=None, api=None, **kwargs): super(ODLConsolidatedCopiesMonitor, self).__init__(_db, collection, **kwargs) self.api = api or ODLWithConsolidatedCopiesAPI(_db, collection) self.start_url = collection.external_integration.setting( ODLWithConsolidatedCopiesAPI.CONSOLIDATED_COPIES_URL_KEY).value self.analytics = Analytics(_db)
def default_circulation_replacement_policy(self): return ReplacementPolicy( identifiers=False, subjects=True, contributions=True, formats=True, analytics=Analytics(self._db), )
def __init__(self, _db, collection, api_class=OverdriveAPI): """Constructor.""" super(OverdriveCirculationMonitor, self).__init__(_db, collection) self.api = api_class(_db, collection) self.maximum_consecutive_unchanged_books = ( self.MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS ) self.analytics = Analytics(_db)
def test_collect_event(self): # This will be a site-wide integration because it will have no # associated libraries when the Analytics singleton is instantiated. # the first time. sitewide_integration, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol=MOCK_PROTOCOL, ) # This will be a per-library integration because it will have at least # one associated library when the Analytics singleton is instantiated. library_integration, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol=MOCK_PROTOCOL, ) library, ignore = create(self._db, Library, short_name="library") library_integration.libraries += [library] work = self._work(title="title", with_license_pool=True) [lp] = work.license_pools analytics = Analytics(self._db) sitewide_provider = analytics.sitewide_providers[0] library_provider = analytics.library_providers[library.id][0] analytics.collect_event(self._default_library, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, None) # The sitewide provider was called. assert 1 == sitewide_provider.count assert CirculationEvent.DISTRIBUTOR_CHECKIN == sitewide_provider.event_type # The library provider wasn't called, since the event was for a different library. assert 0 == library_provider.count analytics.collect_event(library, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, None) # Now both providers were called, since the event was for the library provider's library. assert 2 == sitewide_provider.count assert 1 == library_provider.count assert CirculationEvent.DISTRIBUTOR_CHECKIN == library_provider.event_type # Here's an event that we couldn't associate with any # particular library. analytics.collect_event(None, lp, CirculationEvent.DISTRIBUTOR_CHECKOUT, None) # It's counted as a sitewide event, but not as a library event. assert 3 == sitewide_provider.count assert 1 == library_provider.count
def __init__(self, _db, collection, api_class=EnkiAPI, analytics=None): """Constructor.""" super(EnkiImport, self).__init__(_db, collection) self._db = _db if callable(api_class): api = api_class(_db, collection) else: api = api_class self.api = api self.collection_id = collection.id self.analytics = analytics or Analytics(_db)
def update_licensepool_for_identifier(self, isbn, availability): """Update availability information for a single book. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current approximate circulation information (we can tell if it's available, but not how many copies). Bibliographic coverage will be ensured for the OneClick Identifier. Work will be created for the LicensePool and set as presentation-ready. :param isbn the identifier OneClick uses :param availability boolean denoting if book can be lent to patrons """ # find a license pool to match the isbn, and see if it'll need a metadata update later license_pool, is_new_pool = LicensePool.for_foreign_id( self._db, DataSource.ONECLICK, Identifier.ONECLICK_ID, isbn, collection=self.collection ) if is_new_pool: # This is the first time we've seen this book. Make sure its # identifier has bibliographic coverage. self.bibliographic_coverage_provider.ensure_coverage( license_pool.identifier ) # now tell the licensepool if it's lendable policy = ReplacementPolicy( identifiers=False, subjects=True, contributions=True, formats=True, analytics=Analytics(self._db), ) # licenses_available can be 0 or 999, depending on whether the book is # lendable or not. licenses_available = 999 if not availability: licenses_available = 0 circulation_data = CirculationData(data_source=DataSource.ONECLICK, primary_identifier=license_pool.identifier, licenses_available=licenses_available) license_pool, circulation_changed = circulation_data.apply( self._db, self.collection, replace=policy, ) return license_pool, is_new_pool, circulation_changed
def test_is_configured(self): # If the Analytics constructor has not been called, then # is_configured() calls it so that the values are populated. Analytics.GLOBAL_ENABLED = None Analytics.LIBRARY_ENABLED = object() library = self._default_library assert False == Analytics.is_configured(library) assert False == Analytics.GLOBAL_ENABLED assert set() == Analytics.LIBRARY_ENABLED # If analytics are enabled globally, they are enabled for any # library. Analytics.GLOBAL_ENABLED = True assert True == Analytics.is_configured(object()) # If not, they are enabled only for libraries whose IDs are # in LIBRARY_ENABLED. Analytics.GLOBAL_ENABLED = False assert False == Analytics.is_configured(library) Analytics.LIBRARY_ENABLED.add(library.id) assert True == Analytics.is_configured(library)
def run_once(self, start, cutoff): _db = self._db added_books = 0 overdrive_data_source = DataSource.lookup(_db, DataSource.OVERDRIVE) total_books = 0 consecutive_unchanged_books = 0 for i, book in enumerate(self.recently_changed_ids(start, cutoff)): total_books += 1 if not total_books % 100: self.log.info("%s books processed", total_books) if not book: continue license_pool, is_new, is_changed = self.api.update_licensepool( book) # Log a circulation event for this work. if is_new: Analytics.collect_event(_db, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, license_pool.last_checked) _db.commit() if is_changed: consecutive_unchanged_books = 0 else: consecutive_unchanged_books += 1 if (self.maximum_consecutive_unchanged_books and consecutive_unchanged_books >= self.maximum_consecutive_unchanged_books): # We're supposed to stop this run after finding a # run of books that have not changed, and we have # in fact seen that many consecutive unchanged # books. self.log.info("Stopping at %d unchanged books.", consecutive_unchanged_books) break if total_books: self.log.info("Processed %d books total.", total_books)
def test_release_hold_sends_analytics_event(self): self.pool.on_hold_to(self.patron) self.remote.queue_release_hold(True) config = { Configuration.POLICIES: { Configuration.ANALYTICS_POLICY: ["core.mock_analytics_provider"] } } with temp_config(config) as config: provider = MockAnalyticsProvider() analytics = Analytics.initialize(['core.mock_analytics_provider'], config) result = self.circulation.release_hold(self.patron, '1234', self.pool) eq_(True, result) # An analytics event was created. mock = Analytics.instance().providers[0] eq_(1, mock.count) eq_(CirculationEvent.CM_HOLD_RELEASE, mock.event_type)
def test_hold_sends_analytics_event(self): self.remote.queue_checkout(NoAvailableCopies()) holdinfo = HoldInfo(self.identifier.type, self.identifier.identifier, None, None, 10) self.remote.queue_hold(holdinfo) config = { Configuration.POLICIES: { Configuration.ANALYTICS_POLICY: ["core.mock_analytics_provider"] } } with temp_config(config) as config: provider = MockAnalyticsProvider() analytics = Analytics.initialize(['core.mock_analytics_provider'], config) loan, hold, is_new = self.borrow() # The Hold looks good. eq_(holdinfo.identifier, hold.license_pool.identifier.identifier) eq_(self.patron, hold.patron) eq_(None, loan) eq_(True, is_new) # An analytics event was created. mock = Analytics.instance().providers[0] eq_(1, mock.count) eq_(CirculationEvent.CM_HOLD_PLACE, mock.event_type) # Try to 'borrow' the same book again. self.remote.queue_checkout(AlreadyOnHold()) loan, hold, is_new = self.borrow() eq_(False, is_new) # Since the hold already existed, no new analytics event was # sent. eq_(1, mock.count)
def process_availability(self, media_type='ebook'): # get list of all titles, with availability info availability_list = self.api.get_ebook_availability_info( media_type=media_type) item_count = 0 for availability in availability_list: isbn = availability['isbn'] # boolean True/False value, not number of licenses available = availability['availability'] license_pool, is_new, is_changed = self.api.update_licensepool_for_identifier( isbn, available) # Log a circulation event for this work. if is_new: Analytics.collect_event( self._db, license_pool, CirculationEvent.DISTRIBUTOR_AVAILABILITY_NOTIFY, license_pool.last_checked) item_count += 1 if item_count % self.batch_size == 0: self._db.commit() return item_count
def __init__(self, _db, collection, api_class=BibliothecaAPI, cli_date=None, analytics=None): self.analytics = analytics or Analytics(_db) super(BibliothecaEventMonitor, self).__init__(_db, collection) if isinstance(api_class, BibliothecaAPI): # We were given an actual API object. Just use it. self.api = api_class else: self.api = api_class(_db, collection) self.bibliographic_coverage_provider = BibliothecaBibliographicCoverageProvider( collection, self.api) if cli_date: self.default_start_time = self.create_default_start_time( _db, cli_date)
def fulfill(self, patron, pin, licensepool, delivery_mechanism, sync_on_failure=True): """Fulfil a book that a patron has previously checked out. :param delivery_mechanism: A LicensePoolDeliveryMechanism explaining how the patron wants the book to be delivered. If the book has previously been delivered through some other mechanism, this parameter is ignored and the previously used mechanism takes precedence. :return: A FulfillmentInfo object. """ fulfillment = None loan = get_one(self._db, Loan, patron=patron, license_pool=licensepool, on_multiple='interchangeable') if not loan: if sync_on_failure: # Sync and try again. self.sync_bookshelf(patron, pin) return self.fulfill(patron, pin, licensepool=licensepool, delivery_mechanism=delivery_mechanism, sync_on_failure=False) else: raise NoActiveLoan( _("Cannot find your active loan for this work.")) if loan.fulfillment is not None and loan.fulfillment != delivery_mechanism and not delivery_mechanism.delivery_mechanism.is_streaming: raise DeliveryMechanismConflict( _("You already fulfilled this loan as %(loan_delivery_mechanism)s, you can't also do it as %(requested_delivery_mechanism)s", loan_delivery_mechanism=loan.fulfillment.delivery_mechanism. name, requested_delivery_mechanism=delivery_mechanism. delivery_mechanism.name)) if licensepool.open_access: fulfillment = self.fulfill_open_access( licensepool, delivery_mechanism.delivery_mechanism) else: api = self.api_for_license_pool(licensepool) internal_format = api.internal_format(delivery_mechanism) fulfillment = api.fulfill(patron, pin, licensepool, internal_format) if not fulfillment or not (fulfillment.content_link or fulfillment.content): raise NoAcceptableFormat() # Send out an analytics event to record the fact that # a fulfillment was initiated through the circulation # manager. Analytics.collect_event( self._db, licensepool, CirculationEvent.CM_FULFILL, ) # Make sure the delivery mechanism we just used is associated # with the loan. if loan.fulfillment is None and not delivery_mechanism.delivery_mechanism.is_streaming: __transaction = self._db.begin_nested() loan.fulfillment = delivery_mechanism __transaction.commit() return fulfillment
class OverdriveCirculationMonitor(CollectionMonitor, TimelineMonitor): """Maintain LicensePools for recently changed Overdrive titles. Create basic Editions for any new LicensePools that show up. """ SERVICE_NAME = "Overdrive Circulation Monitor" PROTOCOL = ExternalIntegration.OVERDRIVE OVERLAP = datetime.timedelta(minutes=1) # Report successful completion upon finding this number of # consecutive books in the Overdrive results whose LicensePools # haven't changed since last time. Overdrive results are not in # strict chronological order, but if you see 100 consecutive books # that haven't changed, you're probably done. MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS = None def __init__(self, _db, collection, api_class=OverdriveAPI): """Constructor.""" super(OverdriveCirculationMonitor, self).__init__(_db, collection) self.api = api_class(_db, collection) self.maximum_consecutive_unchanged_books = ( self.MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS ) self.analytics = Analytics(_db) def recently_changed_ids(self, start, cutoff): return self.api.recently_changed_ids(start, cutoff) def catch_up_from(self, start, cutoff, progress): """Find Overdrive books that changed recently. :progress: A TimestampData representing the time previously covered by this Monitor. """ _db = self._db added_books = 0 overdrive_data_source = DataSource.lookup( _db, DataSource.OVERDRIVE) total_books = 0 consecutive_unchanged_books = 0 # Ask for changes between the last time covered by the Monitor # and the current time. for i, book in enumerate(self.recently_changed_ids(start, cutoff)): total_books += 1 if not total_books % 100: self.log.info("%s books processed", total_books) if not book: continue license_pool, is_new, is_changed = self.api.update_licensepool(book) # Log a circulation event for this work. if is_new: for library in self.collection.libraries: self.analytics.collect_event( library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, license_pool.last_checked) _db.commit() if is_changed: consecutive_unchanged_books = 0 else: consecutive_unchanged_books += 1 if (self.maximum_consecutive_unchanged_books and consecutive_unchanged_books >= self.maximum_consecutive_unchanged_books): # We're supposed to stop this run after finding a # run of books that have not changed, and we have # in fact seen that many consecutive unchanged # books. self.log.info("Stopping at %d unchanged books.", consecutive_unchanged_books) break progress.achievements = "Books processed: %d." % total_books
class BibliothecaCirculationSweep(IdentifierSweepMonitor): """Check on the current circulation status of each Bibliotheca book in our collection. In some cases this will lead to duplicate events being logged, because this monitor and the main Bibliotheca circulation monitor will count the same event. However it will greatly improve our current view of our Bibliotheca circulation, which is more important. """ SERVICE_NAME = "Bibliotheca Circulation Sweep" DEFAULT_BATCH_SIZE = 25 PROTOCOL = ExternalIntegration.BIBLIOTHECA def __init__(self, _db, collection, api_class=BibliothecaAPI, **kwargs): _db = Session.object_session(collection) super(BibliothecaCirculationSweep, self).__init__(_db, collection, **kwargs) if isinstance(api_class, BibliothecaAPI): self.api = api_class else: self.api = api_class(_db, collection) self.analytics = Analytics(_db) def process_items(self, identifiers): identifiers_by_bibliotheca_id = dict() bibliotheca_ids = set() for identifier in identifiers: bibliotheca_ids.add(identifier.identifier) identifiers_by_bibliotheca_id[identifier.identifier] = identifier identifiers_not_mentioned_by_bibliotheca = set(identifiers) now = datetime.datetime.utcnow() for circ in self.api.get_circulation_for(bibliotheca_ids): if not circ: continue self._process_circulation_data( circ, identifiers_by_bibliotheca_id, identifiers_not_mentioned_by_bibliotheca, ) # At this point there may be some license pools left over # that Bibliotheca doesn't know about. This is a pretty reliable # indication that we no longer own any licenses to the # book. for identifier in identifiers_not_mentioned_by_bibliotheca: pools = [ lp for lp in identifier.licensed_through if lp.data_source.name == DataSource.BIBLIOTHECA and lp.collection == self.collection ] if not pools: continue for pool in pools: if pool.licenses_owned > 0: if pool.presentation_edition: self.log.warn("Removing %s (%s) from circulation", pool.presentation_edition.title, pool.presentation_edition.author) else: self.log.warn( "Removing unknown work %s from circulation.", identifier.identifier) pool.update_availability(0, 0, 0, 0, self.analytics) pool.last_checked = now def _process_circulation_data(self, circ, identifiers_by_bibliotheca_id, identifiers_not_mentioned_by_bibliotheca): """Process a single CirculationData object retrieved from Bibliotheca. """ bibliotheca_id = circ[Identifier][Identifier.BIBLIOTHECA_ID] identifier = identifiers_by_bibliotheca_id[bibliotheca_id] identifiers_not_mentioned_by_bibliotheca.remove(identifier) pools = [ lp for lp in identifier.licensed_through if lp.data_source.name == DataSource.BIBLIOTHECA and lp.collection == self.collection ] if not pools: # We don't have a license pool for this work. That # shouldn't happen--how did we know about the # identifier?--but it shouldn't be a big deal to # create one. pool, ignore = LicensePool.for_foreign_id( self._db, self.collection.data_source, identifier.type, identifier.identifier, collection=self.collection) # Bibliotheca books are never open-access. pool.open_access = False for library in self.collection.libraries: self.analytics.collect_event( library, pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, datetime.datetime.utcnow()) else: [pool] = pools self.api.apply_circulation_information_to_licensepool( circ, pool, self.analytics)
import core.storage from core.analytics import Analytics import matplotlib matplotlib.use('Agg') root = logging.getLogger() root.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(levelname).5s - %(name)s - %(message)s') handler.setFormatter(formatter) root.addHandler(handler) analytics = Analytics('temp/data.h5', 0) def get_frames(): fs = core.storage.read_arrays_from_file('temp/frames.h5') for index, frame in enumerate(fs): print(index) for i in range(100): yield frame # frames = core.storage.read_arrays_from_file('temp/frames.h5') frames = get_frames() movement_frames = analytics.particle_movement_frames(frames) frames_total = analytics.frames_in_trajectory()
def test_initialize(self): # supports multiple analytics providers, site-wide or with libraries # Two site-wide integrations site_wide_integration1, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol=MOCK_PROTOCOL, ) site_wide_integration1.url = self._str site_wide_integration2, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol="..local_analytics_provider", ) # A broken integration missing_integration, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol="missing_provider", ) # Two library-specific integrations l1, ignore = create(self._db, Library, short_name="L1") l2, ignore = create(self._db, Library, short_name="L2") library_integration1, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol=MOCK_PROTOCOL, ) library_integration1.libraries += [l1, l2] library_integration2, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol=MOCK_PROTOCOL, ) library_integration2.libraries += [l2] analytics = Analytics(self._db) assert 2 == len(analytics.sitewide_providers) assert isinstance(analytics.sitewide_providers[0], MockAnalyticsProvider) assert site_wide_integration1.url == analytics.sitewide_providers[ 0].url assert isinstance(analytics.sitewide_providers[1], LocalAnalyticsProvider) assert missing_integration.id in analytics.initialization_exceptions assert 1 == len(analytics.library_providers[l1.id]) assert isinstance(analytics.library_providers[l1.id][0], MockAnalyticsProvider) assert 2 == len(analytics.library_providers[l2.id]) for provider in analytics.library_providers[l2.id]: assert isinstance(provider, MockAnalyticsProvider) # Instantiating an Analytics object initializes class # variables with the current state of site analytics. # We have global analytics enabled. assert Analytics.GLOBAL_ENABLED is True # We also have analytics enabled for two of the three libraries. assert {l1.id, l2.id} == Analytics.LIBRARY_ENABLED # Now we'll change the analytics configuration. self._db.delete(site_wide_integration1) self._db.delete(site_wide_integration2) self._db.delete(library_integration1) # But Analytics is a singleton, so if we instantiate a new # Analytics object in the same app instance, it will be the # same as the previous one. analytics2 = Analytics(self._db) assert analytics2 == analytics assert 2 == len(analytics.sitewide_providers) assert 1 == len(analytics.library_providers[l1.id]) assert 2 == len(analytics.library_providers[l2.id]) # If, however, we simulate a configuration refresh ... analytics3 = Analytics(self._db, refresh=True) # ... we will see the updated configuration. assert analytics3 == analytics assert Analytics.GLOBAL_ENABLED is False assert {l2.id} == Analytics.LIBRARY_ENABLED
class OverdriveCirculationMonitor(CollectionMonitor): """Maintain LicensePools for recently changed Overdrive titles. Create basic Editions for any new LicensePools that show up. """ SERVICE_NAME = "Overdrive Circulation Monitor" INTERVAL_SECONDS = 500 PROTOCOL = ExternalIntegration.OVERDRIVE # Report successful completion upon finding this number of # consecutive books in the Overdrive results whose LicensePools # haven't changed since last time. Overdrive results are not in # strict chronological order, but if you see 100 consecutive books # that haven't changed, you're probably done. MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS = None def __init__(self, _db, collection, api_class=OverdriveAPI): """Constructor.""" super(OverdriveCirculationMonitor, self).__init__(_db, collection) self.api = api_class(_db, collection) self.maximum_consecutive_unchanged_books = ( self.MAXIMUM_CONSECUTIVE_UNCHANGED_BOOKS) self.analytics = Analytics(_db) def recently_changed_ids(self, start, cutoff): return self.api.recently_changed_ids(start, cutoff) def run_once(self, start, cutoff): _db = self._db added_books = 0 overdrive_data_source = DataSource.lookup(_db, DataSource.OVERDRIVE) total_books = 0 consecutive_unchanged_books = 0 for i, book in enumerate(self.recently_changed_ids(start, cutoff)): total_books += 1 if not total_books % 100: self.log.info("%s books processed", total_books) if not book: continue license_pool, is_new, is_changed = self.api.update_licensepool( book) # Log a circulation event for this work. if is_new: for library in self.collection.libraries: self.analytics.collect_event( library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, license_pool.last_checked) _db.commit() if is_changed: consecutive_unchanged_books = 0 else: consecutive_unchanged_books += 1 if (self.maximum_consecutive_unchanged_books and consecutive_unchanged_books >= self.maximum_consecutive_unchanged_books): # We're supposed to stop this run after finding a # run of books that have not changed, and we have # in fact seen that many consecutive unchanged # books. self.log.info("Stopping at %d unchanged books.", consecutive_unchanged_books) break if total_books: self.log.info("Processed %d books total.", total_books)
class EnkiImport(CollectionMonitor): """Import content from Enki that we don't yet have in our collection """ SERVICE_NAME = "Enki Circulation Monitor" INTERVAL_SECONDS = 500 PROTOCOL = EnkiAPI.ENKI_EXTERNAL DEFAULT_BATCH_SIZE = 100 FIVE_MINUTES = datetime.timedelta(minutes=5) def __init__(self, _db, collection, api_class=EnkiAPI): """Constructor.""" super(EnkiImport, self).__init__(_db, collection) self._db = _db self.api = api_class(_db, collection) self.collection_id = collection.id self.analytics = Analytics(_db) self.bibliographic_coverage_provider = ( EnkiBibliographicCoverageProvider(collection, api_class=self.api)) @property def collection(self): return Collection.by_id(self._db, id=self.collection_id) def recently_changed_ids(self, start, cutoff): return self.api.recently_changed_ids(start, cutoff) def run_once(self, start, cutoff): # Give us five minutes of overlap because it's very important # we don't miss anything. since = start - self.FIVE_MINUTES id_start = 0 while True: availability = self.api.availability(since=since, strt=id_start, qty=self.DEFAULT_BATCH_SIZE) if availability.status_code != 200: self.log.error( "Could not contact Enki server for content availability. Status: %d", availability.status_code) content = availability.content count = 0 for bibliographic, circulation in BibliographicParser( ).process_all(content): self.process_book(bibliographic, circulation) count += 1 if count == 0: break self._db.commit() id_start += self.DEFAULT_BATCH_SIZE def process_book(self, bibliographic, availability): license_pool, new_license_pool = availability.license_pool( self._db, self.collection) now = datetime.datetime.utcnow() edition, new_edition = bibliographic.edition(self._db) license_pool.edition = edition policy = ReplacementPolicy( identifiers=False, subjects=True, contributions=True, formats=True, ) availability.apply( self._db, license_pool.collection, replace=policy, ) if new_edition: bibliographic.apply(edition, self.collection, replace=policy) if new_license_pool or new_edition: # At this point we have done work equivalent to that done by # the EnkiBibliographicCoverageProvider. Register that the # work has been done so we don't have to do it again. identifier = edition.primary_identifier self.bibliographic_coverage_provider.handle_success(identifier) self.bibliographic_coverage_provider.add_coverage_record_for( identifier) for library in self.collection.libraries: self.analytics.collect_event( library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, now) return edition, license_pool