Ejemplo n.º 1
0
    def test_create_item(self):
        beet_beacon_major_id = '65370'
        beet_beacon_minor_id = '49339'
        beet_beacon_title = 'Beet Beacon'

        lemon_beacon_major_id = '48448'
        lemon_beacon_minor_id = '18544'
        lemon_beacon_title = 'Lemon Tart Beacon'

        item, made_new = get_one_or_create(
            self._db,
            DisplayItem,
            create_method_kwargs=dict(
                beacon_major_id=beet_beacon_major_id,
                beacon_minor_id=beet_beacon_minor_id,
            ),
            id=self._id)
        eq_(True, made_new)

        collection, made_new = get_one_or_create(self._db,
                                                 Collection,
                                                 create_method_kwargs=dict(
                                                     name="Cool Books",
                                                     curator="Jane Curator",
                                                 ),
                                                 id=self._id)
        eq_(True, made_new)

        item.collections.append(collection)
        eq_(collection.id, item.collections[0].id)
Ejemplo n.º 2
0
def package_setup():
    """Make sure the database schema is initialized and initial
    data is in place.
    """
    engine, connection = DatabaseTest.get_database_connection()

    # First, recreate the schema.
    #
    # Base.metadata.drop_all(connection) doesn't work here, so we
    # approximate by dropping everything except the materialized
    # views.
    for table in reversed(Base.metadata.sorted_tables):
        if not table.name.startswith('mv_'):
            engine.execute(table.delete())

    Base.metadata.create_all(connection)

    # Initialize basic database data needed by the application.
    _db = Session(connection)
    SessionManager.initialize_data(_db)

    # Create the patron used by the dummy authentication mechanism.
    # TODO: This can be probably be moved to circulation.
    get_one_or_create(
        _db, Patron, authorization_identifier="200",
        create_method_kwargs=dict(external_identifier="200200200")
    )
    _db.commit()
    connection.close()
    engine.dispose()
Ejemplo n.º 3
0
 def _collection(self, name=u"Faketown Public Library"):
     source, ignore = get_one_or_create(self._db, DataSource, name=name)
     return get_one_or_create(self._db,
                              Collection,
                              name=name,
                              data_source=source,
                              client_id=u"abc",
                              client_secret=u"def")[0]
Ejemplo n.º 4
0
    def to_collection(self, _db):
        """Find or create a Collection object for this Overdrive Advantage
        account.

        :return: a 2-tuple of Collections (primary Overdrive
        collection, Overdrive Advantage collection)
        """
        # First find the parent Collection.
        parent = get_one(_db,
                         Collection,
                         external_account_id=self.parent_library_id,
                         protocol=Collection.OVERDRIVE)
        if not parent:
            # Without the parent's credentials we can't access the child.
            raise ValueError(
                "Cannot create a Collection whose parent does not already exist."
            )
        name = parent.name + " / " + self.name
        child, ignore = get_one_or_create(_db,
                                          Collection,
                                          parent_id=parent.id,
                                          protocol=Collection.OVERDRIVE,
                                          external_account_id=self.library_id,
                                          create_method_kwargs=dict(name=name))
        # Set or update the name of the collection to reflect the name of
        # the library, just in case that name has changed.
        child.name = name
        return parent, child
Ejemplo n.º 5
0
    def __init__(self, _db, collection=None, *args, **kwargs):
        self.access_token_requests = []
        self.requests = []
        self.responses = []

        if not collection:
            # OverdriveAPI needs a Collection, but none was provided.
            # Just create a basic one.
            library = Library.instance(_db)
            collection, ignore = get_one_or_create(
                _db,
                Collection,
                name="Test Overdrive Collection",
                protocol=Collection.OVERDRIVE,
                create_method_kwargs=dict(external_account_id=u'c'))
            collection.external_integration.username = u'a'
            collection.external_integration.password = u'b'
            collection.external_integration.set_setting('website_id', 'd')
            library.collections.append(collection)

        # The constructor will always make a request for the collection token.
        self.queue_response(
            200, content=self.mock_collection_token("collection token"))
        self.access_token_response = self.mock_access_token_response(
            "bearer token")

        super(MockOverdriveAPI, self).__init__(_db, collection, *args,
                                               **kwargs)
Ejemplo n.º 6
0
    def _customlist(self, foreign_identifier=None,
                    name=None,
                    data_source_name=DataSource.NYT, num_entries=1,
                    entries_exist_as_works=True
    ):
        data_source = DataSource.lookup(self._db, data_source_name)
        foreign_identifier = foreign_identifier or self._str
        now = datetime.utcnow()
        customlist, ignore = get_one_or_create(
            self._db, CustomList,
            create_method_kwargs=dict(
                created=now,
                updated=now,
                name=name or self._str,
                description=self._str,
                ),
            data_source=data_source,
            foreign_identifier=foreign_identifier
        )

        editions = []
        for i in range(num_entries):
            if entries_exist_as_works:
                work = self._work(with_open_access_download=True)
                edition = work.presentation_edition
            else:
                edition = self._edition(
                    data_source_name, title="Item %s" % i)
                edition.permanent_work_id="Permanent work ID %s" % self._str
            customlist.add_entry(
                edition, "Annotation %s" % i, first_appearance=now)
            editions.append(edition)
        return customlist, editions
Ejemplo n.º 7
0
    def do_run(self, _db=None, cmd_args=None, output=sys.stdout):
        _db = _db or self._db
        parsed = self.parse_command_line(_db, cmd_args=cmd_args)

        integration, is_new = get_one_or_create(
            _db, ExternalIntegration, goal=ExternalIntegration.DRM_GOAL,
            protocol=ExternalIntegration.ADOBE_VENDOR_ID
        )
        c = Configuration

        # All node values are string representations of hexidecimal
        # numbers.
        hex_node = int(parsed.node_value, 16)

        integration.setting(c.ADOBE_VENDOR_ID).value = parsed.vendor_id
        integration.setting(c.ADOBE_VENDOR_ID_NODE_VALUE).value = parsed.node_value
        delegates = parsed.delegate
        for delegate in delegates:
            if not delegate.endswith("/AdobeAuth/"):
                raise ValueError(
                    'Invalid delegate: %s. Expected something ending with "/AdobeAuth/"' % delegate
                )
        integration.setting(Configuration.ADOBE_VENDOR_ID_DELEGATE_URL).value = (
            json.dumps(delegates)
        )
        _db.commit()
Ejemplo n.º 8
0
 def _classification(self, identifier, subject, data_source, weight=1):
     return get_one_or_create(self._db,
                              Classification,
                              identifier=identifier,
                              subject=subject,
                              data_source=data_source,
                              weight=weight)[0]
Ejemplo n.º 9
0
 def _integration_client(self, url=None, shared_secret=None):
     url = url or self._url
     secret = shared_secret or u"secret"
     return get_one_or_create(self._db,
                              IntegrationClient,
                              shared_secret=secret,
                              create_method_kwargs=dict(url=url))[0]
Ejemplo n.º 10
0
 def _integration(self, _db, id, name, protocol, goal):
     """Find or create the ExternalIntegration referred to."""
     if not id and not name and not (protocol and goal):
         raise ValueError(
             "An integration must by identified by either ID, name, or the combination of protocol and goal."
         )
     integration = None
     if id:
         integration = get_one(
             _db, ExternalIntegration, ExternalIntegration.id==id
         )
         if not integration:
             raise ValueError("No integration with ID %s." % id)
     if name:
         integration = get_one(_db, ExternalIntegration, name=name)
         if not integration and not (protocol and goal):
             raise ValueError(
                 'No integration with name "%s". To create it, you must also provide protocol and goal.' % name
             )
     if not integration and (protocol and goal):
         integration, is_new = get_one_or_create(
             _db, ExternalIntegration, protocol=protocol, goal=goal
         )
     if name:
         integration.name = name
     return integration
Ejemplo n.º 11
0
    def to_customlist(self, _db, dictreader):
        """Turn the CSV file in `dictreader` into a CustomList.

        TODO: Keep track of the list's current members. If any item
        was on the list but is no longer on the list, set its
        last_appeared date to its most recent appearance.
        """
        data_source = DataSource.lookup(_db, self.data_source_name)
        now = datetime.datetime.utcnow()

        # Find or create the CustomList object itself.
        custom_list, was_new = get_one_or_create(
            _db,
            CustomList,
            data_source=data_source,
            foreign_identifier=self.foreign_identifier,
            create_method_kwargs = dict(
                created=now,
            )
        )
        custom_list.updated = now

        # Turn the rows of the CSV file into a sequence of Metadata
        # objects, then turn each Metadata into a CustomListEntry object.
        for metadata in self.to_metadata(dictreader):
            entry = self.metadata_to_list_entry(
                custom_list, data_source, now, metadata)
Ejemplo n.º 12
0
    def validate_email(self):
        # Manually validate an email address, without the admin having to click on a confirmation link
        uuid = flask.request.form.get("uuid")
        email = flask.request.form.get("email")
        library = self.library_for_request(uuid)
        if isinstance(library, ProblemDetail):
            return library
        email_types = {
            "contact_email": Hyperlink.INTEGRATION_CONTACT_REL,
            "help_email": Hyperlink.HELP_REL,
            "copyright_email": Hyperlink.COPYRIGHT_DESIGNATED_AGENT_REL
        }
        hyperlink = None
        if email_types.get(email):
            hyperlink = Library.get_hyperlink(library, email_types[email])
        if not hyperlink or not hyperlink.resource or isinstance(
                hyperlink, ProblemDetail):
            return INVALID_CONTACT_URI.detailed(
                "The contact URI for this library is missing or invalid")
        validation, is_new = get_one_or_create(self._db,
                                               Validation,
                                               resource=hyperlink.resource)
        validation.restart()
        validation.mark_as_successful()

        return self.library_details(uuid)
Ejemplo n.º 13
0
    def run(self):
        self.timestamp, new = get_one_or_create(
            self._db,
            Timestamp,
            service=self.service_name,
            create_method_kwargs=dict(counter=self.default_counter))
        offset = self.timestamp.counter or self.default_counter

        started_at = datetime.datetime.utcnow()
        while not self.stop_running:
            a = time.time()
            old_offset = offset
            try:
                new_offset = self.run_once(offset)
            except Exception, e:
                self.log.error("Error during run: %s", e, exc_info=e)
                break
            to_sleep = 0
            if new_offset == 0:
                # We completed a sweep. We're done.
                self.stop_running = True
                self.cleanup()
            self.counter = new_offset
            self.timestamp.counter = self.counter
            self._db.commit()
            if old_offset != new_offset:
                self.log.debug("Old offset: %s" % offset)
                self.log.debug("New offset: %s", new_offset)
                b = time.time()
                self.log.debug("Elapsed: %.2f sec" % (b - a))
            if to_sleep > 0:
                if old_offset != new_offset:
                    self.log.debug("Sleeping for %.1f", to_sleep)
                time.sleep(to_sleep)
            offset = new_offset
Ejemplo n.º 14
0
    def run(self):        
        if self.keep_timestamp:
            self.timestamp, new = get_one_or_create(
                self._db, Timestamp,
                service=self.service_name,
                create_method_kwargs=dict(
                    timestamp=self.default_start_time
                )
            )
            start = self.timestamp.timestamp or self.default_start_time
        else:
            start = self.default_start_time
            self.timestamp = None

        while not self.stop_running:
            cutoff = datetime.datetime.utcnow()           
            new_timestamp = self.run_once(start, cutoff) or cutoff
            duration = datetime.datetime.utcnow() - cutoff
            to_sleep = self.interval_seconds-duration.seconds-1
            self.cleanup()
            if self.keep_timestamp:
                self.timestamp.timestamp = new_timestamp
            self._db.commit()

            # TODO: This could be a little nicer, but basically we now
            # want monitors to run through once and then stop.
            if True:
                self.stop_running = True
            elif to_sleep > 0:
                self.log.debug("Sleeping for %.1f", to_sleep)
                time.sleep(to_sleep)
            start = new_timestamp
Ejemplo n.º 15
0
 def _integration_client(self, url=None, shared_secret=None):
     url = url or self._url
     secret = shared_secret or u"secret"
     return get_one_or_create(
         self._db, IntegrationClient, shared_secret=secret,
         create_method_kwargs=dict(url=url)
     )[0]
Ejemplo n.º 16
0
 def _integration_client(self, url=None):
     url = url or self._url
     return get_one_or_create(self._db,
                              IntegrationClient,
                              url=url,
                              key=u"abc",
                              secret=u"def")[0]
Ejemplo n.º 17
0
    def test_fetch_ignores_feeds_without_content(self):
        facets = Facets.default(self._default_library)
        pagination = Pagination.default()
        lane = self._lane(u"My Lane", languages=['eng', 'chi'])

        # Create a feed without content (i.e. don't update it)
        contentless_feed = get_one_or_create(
            self._db,
            CachedFeed,
            lane_id=lane.id,
            type=CachedFeed.PAGE_TYPE,
            facets=unicode(facets.query_string),
            pagination=unicode(pagination.query_string))[0]

        # It's not returned because it hasn't been updated.
        args = (self._db, lane, CachedFeed.PAGE_TYPE, facets, pagination, None)
        feed, fresh = CachedFeed.fetch(*args)
        eq_(True, feed != contentless_feed)
        eq_(False, fresh)

        # But if the feed is updated, we get it back.
        feed.update(self._db, u"Just feedy things")
        result, fresh = CachedFeed.fetch(*args)
        eq_(True, fresh)
        eq_(feed, result)
Ejemplo n.º 18
0
 def _lane(self,
           display_name=None,
           library=None,
           parent=None,
           genres=None,
           languages=None,
           fiction=None):
     display_name = display_name or self._str
     library = library or self._default_library
     lane, is_new = get_one_or_create(
         self._db,
         Lane,
         library=library,
         parent=parent,
         display_name=display_name,
         create_method_kwargs=dict(fiction=fiction))
     if is_new and parent:
         lane.priority = len(parent.sublanes) - 1
     if genres:
         if not isinstance(genres, list):
             genres = [genres]
         for genre in genres:
             if isinstance(genre, basestring):
                 genre, ignore = Genre.lookup(self._db, genre)
             lane.genres.append(genre)
     if languages:
         if not isinstance(languages, list):
             languages = [languages]
         lane.languages = languages
     return lane
Ejemplo n.º 19
0
 def _patron(self, external_identifier=None, library=None):
     external_identifier = external_identifier or self._str
     library = library or self._default_library
     return get_one_or_create(self._db,
                              Patron,
                              external_identifier=external_identifier,
                              library=library)[0]
Ejemplo n.º 20
0
    def to_custom_list_entry(self, custom_list, metadata_client,
                             overwrite_old_data=False):
        """Turn this object into a CustomListEntry with associated Edition."""
        _db = Session.object_session(custom_list)
        edition = self.to_edition(_db, metadata_client, overwrite_old_data)

        list_entry, is_new = get_one_or_create(
            _db, CustomListEntry, edition=edition, customlist=custom_list
        )

        if (not list_entry.first_appearance
            or list_entry.first_appearance > self.first_appearance):
            if list_entry.first_appearance:
                self.log.info(
                    "I thought %s first showed up at %s, but then I saw it earlier, at %s!",
                    self.metadata.title, list_entry.first_appearance,
                    self.first_appearance
                )
            list_entry.first_appearance = self.first_appearance

        if (not list_entry.most_recent_appearance
            or list_entry.most_recent_appearance < self.most_recent_appearance):
            if list_entry.most_recent_appearance:
                self.log.info(
                    "I thought %s most recently showed up at %s, but then I saw it later, at %s!",
                    self.metadata.title, list_entry.most_recent_appearance,
                    self.most_recent_appearance
                )
            list_entry.most_recent_appearance = self.most_recent_appearance

        list_entry.annotation = self.annotation

        list_entry.set_work(self.metadata, metadata_client)
        return list_entry, is_new
Ejemplo n.º 21
0
 def _patron(self, external_identifier=None, library=None):
     external_identifier = external_identifier or self._str
     library = library or self._default_library
     return get_one_or_create(
         self._db, Patron, external_identifier=external_identifier,
         library=library
     )[0]
Ejemplo n.º 22
0
 def _coverage_record(
     self,
     edition,
     coverage_source,
     operation=None,
     status=CoverageRecord.SUCCESS,
     collection=None,
     exception=None,
 ):
     if isinstance(edition, Identifier):
         identifier = edition
     else:
         identifier = edition.primary_identifier
     record, ignore = get_one_or_create(self._db,
                                        CoverageRecord,
                                        identifier=identifier,
                                        data_source=coverage_source,
                                        operation=operation,
                                        collection=collection,
                                        create_method_kwargs=dict(
                                            timestamp=datetime.utcnow(),
                                            status=status,
                                            exception=exception,
                                        ))
     return record
Ejemplo n.º 23
0
    def _customlist(self,
                    foreign_identifier=None,
                    name=None,
                    data_source_name=DataSource.NYT,
                    num_entries=1,
                    entries_exist_as_works=True):
        data_source = DataSource.lookup(self._db, data_source_name)
        foreign_identifier = foreign_identifier or self._str
        now = datetime.utcnow()
        customlist, ignore = get_one_or_create(
            self._db,
            CustomList,
            create_method_kwargs=dict(
                created=now,
                updated=now,
                name=name or self._str,
                description=self._str,
            ),
            data_source=data_source,
            foreign_identifier=foreign_identifier)

        editions = []
        for i in range(num_entries):
            if entries_exist_as_works:
                work = self._work(with_open_access_download=True)
                edition = work.presentation_edition
            else:
                edition = self._edition(data_source_name, title="Item %s" % i)
                edition.permanent_work_id = "Permanent work ID %s" % self._str
            customlist.add_entry(edition,
                                 "Annotation %s" % i,
                                 first_appearance=now)
            editions.append(edition)
        return customlist, editions
Ejemplo n.º 24
0
 def _library(self, name=None, short_name=None):
     name=name or self._str
     short_name = short_name or self._str
     library, ignore = get_one_or_create(
         self._db, Library, name=name, short_name=short_name,
         create_method_kwargs=dict(uuid=str(uuid.uuid4())),
     )
     return library
Ejemplo n.º 25
0
    def load(self, metadata, geometry):
        metadata = json.loads(metadata)
        external_id = metadata['id']
        type = metadata['type']
        parent_external_id = metadata['parent_id']
        name = metadata['name']
        aliases = metadata.get('aliases', [])
        abbreviated_name = metadata.get('abbreviated_name', None)

        if parent_external_id:
            parent = self.places_by_external_id[parent_external_id]
        else:
            parent = None

        # This gives us a Geometry object. Set its SRID so the database
        # knows it's using real-world latitude and longitude.
        geometry = GeometryUtility.from_geojson(geometry)
        place, is_new = get_one_or_create(
            self._db,
            Place,
            external_id=external_id,
            type=type,
            parent=parent,
            create_method_kwargs=dict(geometry=geometry))

        # Set these values, even the ones that were set in
        # create_method_kwargs, so that we can update any that have
        # changed.
        place.external_name = name
        place.abbreviated_name = abbreviated_name
        place.geometry = geometry

        # We only ever add aliases. If the database contains an alias
        # for this place that doesn't show up in the metadata, it
        # may have been created manually.
        for alias in aliases:
            name = alias['name']
            language = alias['language']
            alias, is_new = get_one_or_create(self._db,
                                              PlaceAlias,
                                              place=place,
                                              name=name,
                                              language=language)
        self.places_by_external_id[external_id] = place
        return place, is_new
Ejemplo n.º 26
0
 def make_publication(self, publisher, post):
     """Create a Publication for this Publisher and this Post.
     
     This is your chance to modify the content of the Post for
     different publishers.
     """
     return get_one_or_create(
         self._db, Publication, service=publisher.service,
         post=post
     )
def imp(db, data_source, path, url):
    modified = datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
    data = open(path).read()
    representation, ignore = get_one_or_create(db, Representation,
        url=url, data_source=data_source)
    representation.status_code = 200
    representation.content = data
    representation.media_type = 'application/xml'
    representation.fetched_at = modified
    print url
Ejemplo n.º 28
0
    def test_neglected_source_cannot_be_normalized(self):
        obj, new = get_one_or_create(
                self._db, DataSource,
                name="Neglected source"
        )
        neglected_source = obj
        p = self._popularity(100, neglected_source)
        eq_(None, p.normalized_value)

        r = self._rating(100, neglected_source)
        eq_(None, r.normalized_value)
Ejemplo n.º 29
0
    def _licensepool(self, edition, open_access=True,
                     data_source_name=DataSource.GUTENBERG,
                     with_open_access_download=False,
                     set_edition_as_presentation=False,
                     collection=None):
        source = DataSource.lookup(self._db, data_source_name)
        if not edition:
            edition = self._edition(data_source_name)
        collection = collection or self._default_collection
        pool, ignore = get_one_or_create(
            self._db, LicensePool,
            create_method_kwargs=dict(
                open_access=open_access),
            identifier=edition.primary_identifier,
            data_source=source,
            collection=collection,
            availability_time=datetime.utcnow()
        )

        if set_edition_as_presentation:
            pool.presentation_edition = edition

        if with_open_access_download:
            pool.open_access = True
            url = "http://foo.com/" + self._str
            media_type = MediaTypes.EPUB_MEDIA_TYPE
            link, new = pool.identifier.add_link(
                Hyperlink.OPEN_ACCESS_DOWNLOAD, url,
                source, media_type
            )

            # Add a DeliveryMechanism for this download
            pool.set_delivery_mechanism(
                media_type,
                DeliveryMechanism.NO_DRM,
                RightsStatus.GENERIC_OPEN_ACCESS,
                link.resource,
            )

            representation, is_new = self._representation(
                url, media_type, "Dummy content", mirrored=True)
            link.resource.representation = representation
        else:

            # Add a DeliveryMechanism for this licensepool
            pool.set_delivery_mechanism(
                MediaTypes.EPUB_MEDIA_TYPE,
                DeliveryMechanism.ADOBE_DRM,
                RightsStatus.UNKNOWN,
                None
            )
            pool.licenses_owned = pool.licenses_available = 1

        return pool
Ejemplo n.º 30
0
 def _work_coverage_record(self, work, operation=None,
                           status=CoverageRecord.SUCCESS):
     record, ignore = get_one_or_create(
         self._db, WorkCoverageRecord,
         work=work,
         operation=operation,
         create_method_kwargs = dict(
             timestamp=datetime.utcnow(),
             status=status,
         )
     )
     return record
Ejemplo n.º 31
0
 def _collection(self, name=None, protocol=Collection.OPDS_IMPORT,
                 external_account_id=None, url=None, username=None,
                 password=None):
     name = name or self._str
     collection, ignore = get_one_or_create(
         self._db, Collection, name=name, protocol=protocol
     )
     collection.external_account_id = external_account_id
     collection.external_integration.url = url
     collection.external_integration.username = username
     collection.external_integration.password = password
     return collection
Ejemplo n.º 32
0
 def _work_coverage_record(self, work, operation=None, 
                           status=CoverageRecord.SUCCESS):
     record, ignore = get_one_or_create(
         self._db, WorkCoverageRecord,
         work=work,
         operation=operation,
         create_method_kwargs = dict(
             timestamp=datetime.utcnow(),
             status=status,
         )
     )
     return record
Ejemplo n.º 33
0
def imp(db, data_source, path, url):
    modified = datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
    data = open(path).read()
    representation, ignore = get_one_or_create(db,
                                               Representation,
                                               url=url,
                                               data_source=data_source)
    representation.status_code = 200
    representation.content = data
    representation.media_type = 'application/xml'
    representation.fetched_at = modified
    print url
Ejemplo n.º 34
0
 def _representation(self, url=None, media_type=None, content=None,
                     mirrored=False):
     url = url or "http://foo.com/" + self._str
     repr, is_new = get_one_or_create(
         self._db, Representation, url=url)
     repr.media_type = media_type
     if media_type and content:
         repr.content = content
         repr.fetched_at = datetime.utcnow()
         if mirrored:
             repr.mirror_url = "http://foo.com/" + self._str
             repr.mirrored_at = datetime.utcnow()            
     return repr, is_new
Ejemplo n.º 35
0
 def to_customlist(self, _db):
     """Turn this NYTBestSeller list into a CustomList object."""
     data_source = DataSource.lookup(_db, DataSource.NYT)
     l, was_new = get_one_or_create(
         _db,
         CustomList,
         data_source=data_source,
         foreign_identifier=self.foreign_identifier,
         create_method_kwargs=dict(created=self.created, ))
     l.name = self.name
     l.updated = self.updated
     self.update_custom_list(l)
     return l
Ejemplo n.º 36
0
    def make_default_library(cls, _db):
        """Ensure that the default library exists in the given database.

        This can be called by code intended for use in testing but not actually
        within a DatabaseTest subclass.
        """
        library, ignore = get_one_or_create(_db,
                                            Library,
                                            create_method_kwargs=dict(
                                                uuid=unicode(uuid.uuid4()),
                                                name="default",
                                            ),
                                            short_name="default")
        collection, ignore = get_one_or_create(_db,
                                               Collection,
                                               name="Default Collection")
        integration = collection.create_external_integration(
            ExternalIntegration.OPDS_IMPORT)
        integration.goal = ExternalIntegration.LICENSE_GOAL
        if collection not in library.collections:
            library.collections.append(collection)
        return library
Ejemplo n.º 37
0
 def _license(self, pool, identifier=None, checkout_url=None, status_url=None,
              expires=None, remaining_checkouts=None, concurrent_checkouts=None):
     identifier = identifier or self._str
     checkout_url = checkout_url or self._str
     status_url = status_url or self._str
     license, ignore = get_one_or_create(
         self._db, License, identifier=identifier, license_pool=pool,
         checkout_url=checkout_url,
         status_url=status_url, expires=expires,
         remaining_checkouts=remaining_checkouts,
         concurrent_checkouts=concurrent_checkouts,
     )
     return license
Ejemplo n.º 38
0
    def make_default_library(cls, _db):
        """Ensure that the default library exists in the given database.

        This can be called by code intended for use in testing but not actually
        within a DatabaseTest subclass.
        """
        library, ignore = get_one_or_create(
            _db, Library, create_method_kwargs=dict(
                uuid=unicode(uuid.uuid4()),
                name="default",
            ), short_name="default"
        )
        collection, ignore = get_one_or_create(
            _db, Collection, name="Default Collection"
        )
        integration = collection.create_external_integration(
            ExternalIntegration.OPDS_IMPORT
        )
        integration.goal = ExternalIntegration.LICENSE_GOAL
        if collection not in library.collections:
            library.collections.append(collection)
        return library
Ejemplo n.º 39
0
 def _representation(self, url=None, media_type=None, content=None,
                     mirrored=False):
     url = url or "http://foo.com/" + self._str
     repr, is_new = get_one_or_create(
         self._db, Representation, url=url)
     repr.media_type = media_type
     if media_type and content:
         repr.content = content
         repr.fetched_at = datetime.utcnow()
         if mirrored:
             repr.mirror_url = "http://foo.com/" + self._str
             repr.mirrored_at = datetime.utcnow()
     return repr, is_new
def process_file(_db, filename, class_):
    a = 0
    for i in open(filename):
        v = i.strip().split("\t")
        if class_ == Subject and len(v) == 3:
            type, identifier, name = v
        elif len(v) == 2:
            type, identifier = v
            name = None
        else:
            print "Bad data: %r" % i
        args = {}
        if class_ == Subject and name:
            args['name'] = name
        get_one_or_create(
            _db, class_, type=type, identifier=identifier,
            create_method_kwargs=args
        )
        a += 1
        if not a % 1000:
            _db.commit()
            print a, class_.__name__
    _db.commit()
Ejemplo n.º 41
0
    def _collection(self, name=None, protocol=ExternalIntegration.OPDS_IMPORT,
                    external_account_id=None, url=None, username=None,
                    password=None, data_source_name=None):
        name = name or self._str
        collection, ignore = get_one_or_create(
            self._db, Collection, name=name
        )
        collection.external_account_id = external_account_id
        integration = collection.create_external_integration(protocol)
        integration.goal = ExternalIntegration.LICENSE_GOAL
        integration.url = url
        integration.username = username
        integration.password = password

        if data_source_name:
            collection.data_source = data_source_name
        return collection
def imp(db, data_source, identifier, cache, library):
    i = identifier.identifier
    fn = i + ".json"
    if not cache.exists(fn):
        return
    fn = cache._filename(fn)
    modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime)
    data = cache.open(fn).read()
    a = dict(collection_token=library['collectionToken'],
             item_id=i)
    url = OverdriveAPI.METADATA_ENDPOINT % a
    representation, ignore = get_one_or_create(db, Representation,
        url=url, data_source=data_source, identifier=identifier)
    representation.status_code = 200
    representation.content = data
    representation.media_type = 'application/json'
    representation.fetched_at = modified
    print identifier
Ejemplo n.º 43
0
 def _coverage_record(self, edition, coverage_source, operation=None,
     status=CoverageRecord.SUCCESS, collection=None, exception=None,
 ):
     if isinstance(edition, Identifier):
         identifier = edition
     else:
         identifier = edition.primary_identifier
     record, ignore = get_one_or_create(
         self._db, CoverageRecord,
         identifier=identifier,
         data_source=coverage_source,
         operation=operation,
         collection=collection,
         create_method_kwargs = dict(
             timestamp=datetime.utcnow(),
             status=status,
             exception=exception,
         )
     )
     return record
def imp(db, data_source, identifier, cache):
    i = identifier.identifier
    type = identifier.type

    location = None
    status_code = 200
    media_type = "application/ld+json"
    if type == Identifier.OCLC_WORK:
        url = OCLCLinkedData.WORK_BASE_URL % dict(id=i, type="work")
    elif type == Identifier.OCLC_NUMBER:
        url = OCLCLinkedData.BASE_URL % dict(id=i, type="oclc")
    elif type == Identifier.ISBN:
        url = OCLCLinkedData.ISBN_BASE_URL % dict(id=i)
        media_type = None
        status_code = 301
    representation, new = get_one_or_create(
        db, Representation,
        url=url, data_source=data_source, identifier=identifier,
        )
    if not new:
        print "Already did", identifier
        return False

    if not cache.exists(i):
        # print "Not cached", identifier
        return False
    fn = cache._filename(i)
    modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime)
    data = open(fn).read()

    if type == Identifier.ISBN:
        location = data
        data = None

    representation.status_code = status_code
    representation.content = data
    representation.location = location
    representation.media_type = media_type
    representation.fetched_at = modified
    return True
Ejemplo n.º 45
0
    def timestamp(self):
        """Find or create a Timestamp for this Monitor.

        This does not use TimestampData because it relies on checking
        whether a Timestamp already exists in the database.

        A new timestamp will have .finish set to None, since the first
        run is presumably in progress.
        """
        initial_timestamp = self.initial_start_time
        timestamp, new = get_one_or_create(
            self._db, Timestamp,
            service=self.service_name,
            service_type=Timestamp.MONITOR_TYPE,
            collection=self.collection,
            create_method_kwargs=dict(
                start=initial_timestamp,
                finish=None,
                counter=self.default_counter,
            )
        )
        return timestamp
Ejemplo n.º 46
0
    def _external_integration(self, protocol, goal=None, settings=None,
                              libraries=None, **kwargs
    ):
        integration = None
        if not libraries:
            integration, ignore = get_one_or_create(
                self._db, ExternalIntegration, protocol=protocol, goal=goal
            )
        else:
            if not isinstance(libraries, list):
                libraries = [libraries]

            # Try to find an existing integration for one of the given
            # libraries.
            for library in libraries:
                integration = ExternalIntegration.lookup(
                    self._db, protocol, goal, library=libraries[0]
                )
                if integration:
                    break

            if not integration:
                # Otherwise, create a brand new integration specifically
                # for the library.
                integration = ExternalIntegration(
                    protocol=protocol, goal=goal,
                )
                integration.libraries.extend(libraries)
                self._db.add(integration)

        for attr, value in kwargs.items():
            setattr(integration, attr, value)

        settings = settings or dict()
        for key, value in settings.items():
            integration.set_setting(key, value)

        return integration
Ejemplo n.º 47
0
    def records(self, lane, annotator, start_time=None, force_refresh=False, mirror=None, query_batch_size=500, upload_batch_size=7500):
        """
        Create and export a MARC file for the books in a lane.

        :param lane: The Lane to export books from.
        :param annotator: The Annotator to use when creating MARC records.
        :param start_time: Only include records that were created or modified after this time.
        :param force_refresh: Create new records even when cached records are available.
        :param mirror: Optional mirror to use instead of loading one from configuration.
        :param query_batch_size: Number of works to retrieve with a single database query.
        :param upload_batch_size: Number of records to mirror at a time. This is different
          from query_batch_size because S3 enforces a minimum size of 5MB for all parts
          of a multipart upload except the last, but 5MB of records would be too many
          works for a single query.
        """

        # We mirror the content, if it's not empty. If it's empty, we create a CachedMARCFile
        # and Representation, but don't actually mirror it.
        if not mirror:
            storage_protocol = self.integration.setting(self.STORAGE_PROTOCOL).value
            mirror = MirrorUploader.sitewide(self._db)
            if mirror.NAME != storage_protocol:
                raise Exception("Sitewide mirror integration does not match configured storage protocol")

        if not mirror:
            raise Exception("No mirror integration is configured")

        # End time is before we start the query, because if any records are changed
        # during the processing we may not catch them, and they should be handled
        # again on the next run.
        end_time = datetime.datetime.utcnow()

        works_q = lane.works(self._db)
        if start_time:
            works_q = works_q.filter(MaterializedWorkWithGenre.last_update_time>=start_time)

        total = works_q.count()
        offset = 0

        url = mirror.marc_file_url(self.library, lane, end_time, start_time)
        representation, ignore = get_one_or_create(
            self._db, Representation, url=url, media_type=Representation.MARC_MEDIA_TYPE)

        with mirror.multipart_upload(representation, url) as upload:
            output = StringIO()
            current_count = 0
            while offset < total:
                batch_q = works_q.order_by(
                    MaterializedWorkWithGenre.works_id).offset(
                    offset).limit(query_batch_size)

                for work in batch_q:
                    record = self.create_record(
                        work, annotator, force_refresh, self.integration)
                    if record:
                        output.write(record.as_marc())
                        current_count += 1

                if current_count == upload_batch_size:
                    content = output.getvalue()
                    if content:
                        upload.upload_part(content)
                    output.close()
                    output = StringIO()
                    current_count = 0
                offset += query_batch_size

            # Upload anything left over.
            content = output.getvalue()
            if content:
                upload.upload_part(content)
            output.close()

        representation.fetched_at = end_time
        if not representation.mirror_exception:
            cached, is_new = get_one_or_create(
                self._db, CachedMARCFile, library=self.library,
                lane=(lane if isinstance(lane, Lane) else None),
                start_time=start_time,
                create_method_kwargs=dict(representation=representation))
            if not is_new:
                cached.representation = representation
            cached.end_time = end_time
Ejemplo n.º 48
0
 def _catalog(self, name=u"Faketown Public Library"):
     source, ignore = get_one_or_create(self._db, DataSource, name=name)
Ejemplo n.º 49
0
 def _subject(self, type, identifier):
     return get_one_or_create(
         self._db, Subject, type=type, identifier=identifier
     )[0]
Ejemplo n.º 50
0
 def _classification(self, identifier, subject, data_source, weight=1):
     return get_one_or_create(
         self._db, Classification, identifier=identifier, subject=subject,
         data_source=data_source, weight=weight
     )[0]
Ejemplo n.º 51
0
 def _contributor(self, sort_name=None, name=None, **kw_args):
     name = sort_name or name or self._str
     return get_one_or_create(self._db, Contributor, sort_name=unicode(name), **kw_args)
Ejemplo n.º 52
0
    def _work(self, title=None, authors=None, genre=None, language=None,
              audience=None, fiction=True, with_license_pool=False,
              with_open_access_download=False, quality=0.5, series=None,
              presentation_edition=None, collection=None, data_source_name=None):
        """Create a Work.

        For performance reasons, this method does not generate OPDS
        entries or calculate a presentation edition for the new
        Work. Tests that rely on this information being present
        should call _slow_work() instead, which takes more care to present
        the sort of Work that would be created in a real environment.
        """
        pools = []
        if with_open_access_download:
            with_license_pool = True
        language = language or "eng"
        title = unicode(title or self._str)
        audience = audience or Classifier.AUDIENCE_ADULT
        if audience == Classifier.AUDIENCE_CHILDREN and not data_source_name:
            # TODO: This is necessary because Gutenberg's childrens books
            # get filtered out at the moment.
            data_source_name = DataSource.OVERDRIVE
        elif not data_source_name:
            data_source_name = DataSource.GUTENBERG
        if fiction is None:
            fiction = True
        new_edition = False
        if not presentation_edition:
            new_edition = True
            presentation_edition = self._edition(
                title=title, language=language,
                authors=authors,
                with_license_pool=with_license_pool,
                with_open_access_download=with_open_access_download,
                data_source_name=data_source_name,
                series=series,
                collection=collection,
            )
            if with_license_pool:
                presentation_edition, pool = presentation_edition
                if with_open_access_download:
                    pool.open_access = True
                pools = [pool]
        else:
            pools = presentation_edition.license_pools
        work, ignore = get_one_or_create(
            self._db, Work, create_method_kwargs=dict(
                audience=audience,
                fiction=fiction,
                quality=quality), id=self._id)
        if genre:
            if not isinstance(genre, Genre):
                genre, ignore = Genre.lookup(self._db, genre, autocreate=True)
            work.genres = [genre]
        work.random = 0.5
        work.set_presentation_edition(presentation_edition)

        if pools:
            # make sure the pool's presentation_edition is set,
            # bc loan tests assume that.
            if not work.license_pools:
                for pool in pools:
                    work.license_pools.append(pool)

            for pool in pools:
                pool.set_presentation_edition()

            # This is probably going to be used in an OPDS feed, so
            # fake that the work is presentation ready.
            work.presentation_ready = True
            work.calculate_opds_entries(verbose=False)

        return work