def store_results(record):
    """
    Store the OAG record object in all the appropriate locations:
    - in the cache
    - in the archive
    
    In order to achieve this, this method will also ensure that the object
    has at least one licence (indicating that we "failed-to-obtain-license"), and
    that the bibjson identifiers are all in the appropriate locations.  
    
    It will then remove the item from the processing queue prior to storage
    
    arguments:
    record -- an OAG record object, see the module documentation for details
    
    returns:
    passed in record object, with the queued status removed and any other internal changes
        necessary to prepare it for storage
    
    """
    # Step 1: ensure that a licence was applied, and if not apply one
    if "bibjson" not in record:
        # no bibjson record, so add a blank one
        log.debug("record does not have a bibjson record.")
        record['bibjson'] = {}
        
    if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0:
        # the bibjson record does not contain a license list OR the license list is of zero length
        log.debug("Licence could not be detected, therefore adding 'unknown' licence to " + str(record['bibjson']))
        recordmanager.add_license(record,
            url=config.unknown_url,
            type="failed-to-obtain-license",
            open_access=False,
            error_message="unable to detect licence",
            category="failure",
            provenance_description="no plugin was found that would try to detect a licence.  This entry records that the license is therefore unknown",
        )
        # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url)
        
    # Step 2: unqueue the record
    if record.has_key("queued"):
        log.debug(str(record['identifier']) + ": removing this item from the queue")
        del record["queued"]
    
    # Step 3: update the archive
    _add_identifier_to_bibjson(record['identifier'], record['bibjson'])
    log.debug(str(record['identifier']) + ": storing this item in the archive")
    models.Record.store(record['bibjson'])
    
    # Step 4: update the cache
    log.debug(str(record['identifier']) + ": storing this item in the cache")
    _update_cache(record)
    
    # we have to return the record so that the next step in the chain can
    # deal with it (if such a step exists)
    log.debug("yielded result " + str(record))
    return record
def provider_licence(record):
    """
    Attempt to determine the licence of the record based on the provider information
    contained in record['provider'].  Whether this is successful or not a record['bibjson']['license']
    record will be added.  If the operation was successful this will contain a licence
    statement about the item conforming to the OAG record specification.  If the operation was
    not successful it will contain a "failed-to-obtain-license" record, indicating the
    terms of the failure.
    
    arguments:
    record -- an OAG record object, see the module documentation for details
    
    returns:
    the passed in record object with the record['bibjson']['license'] field added or appended to with
        a new licence
    
    """
    
    # Step 1: check that we have a provider indicator to work from
    if not record.has_key("provider"):
        log.debug("record has no provider, so unable to look for licence: " + str(record))
        return record
    
    # Step 2: get the plugin that will run for the given provider
    p = plugin.PluginFactory.license_detect(record["provider"])
    if p is None:
        log.debug("No plugin to handle provider: " + str(record['provider']))
        return record
    log.debug("Plugin " + str(p) + " to handle provider " + str(record['provider']))
    
    # Step 3: run the plugin on the record
    if "bibjson" not in record:
        # if the record doesn't have a bibjson element, add a blank one
        record['bibjson'] = {}
    p.license_detect(record)
    
    # was the plugin able to detect a licence?
    # if not, we need to add an unknown licence for this provider
    if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0:
        log.debug("No licence detected by plugin " + p._short_name + " so adding unknown licence")
        recordmanager.add_license(record, 
            url=config.unknown_url,
            type="failed-to-obtain-license",
            open_access=False,
            error_message="unable to detect licence",
            category="failure",
            provenance_description="a plugin ran and failed to detect a license for this record.  This entry records that the license is therefore unknown",
            handler=p._short_name,
            handler_version=p.__version__
        )
        # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url, p._short_name, p.__version__)

    # we have to return the record so that the next step in the chain can
    # deal with it
    log.debug("plugin " + str(p) + " yielded result " + str(record))
    return record
Example #3
0
 def describe_license_fail(self, record, source_url, why, suggested_solution='', licence_url=""):
     recordmanager.add_license(
         record, 
         source=source_url, 
         error_message=why, 
         suggested_solution=suggested_solution, 
         url=licence_url,
         type="failed-to-obtain-license",
         open_access=False,
         category="page_scrape",
         provenance_description=self.gen_provenance_description_fail(source_url),
         handler=self._short_name,
         handler_version=self.__version__
     )
def provider_licence(record):
    # Step 1: check that we have a provider indicator to work from
    if not record.has_key("provider"):
        log.debug("record has no provider, so unable to look for licence: " + str(record))
        return record
    
    # Step 2: get the plugin that will run for the given provider
    p = plugin.PluginFactory.license_detect(record["provider"])
    if p is None:
        log.debug("No plugin to handle provider: " + str(record['provider']))
        return record
    log.debug("Plugin " + str(p) + " to handle provider " + str(record['provider']))
    
    # Step 3: run the plugin on the record
    if "bibjson" not in record:
        # if the record doesn't have a bibjson element, add a blank one
        record['bibjson'] = {}
    p.license_detect(record)
    
    # was the plugin able to detect a licence?
    # if not, we need to add an unknown licence for this provider
    if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0:
        log.debug("No licence detected by plugin " + p._short_name + " so adding unknown licence")
        recordmanager.add_license(record, 
            url=config.unknown_url,
            type="failed-to-obtain-license",
            open_access=False,
            error_message="unable to detect licence",
            category="failure",
            provenance_description="a plugin ran and failed to detect a license for this record.  This entry records that the license is therefore unknown",
            handler=p._short_name,
            handler_version=p.__version__
        )
        # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url, p._short_name, p.__version__)

    # we have to return the record so that the next step in the chain can
    # deal with it
    log.debug("plugin " + str(p) + " yielded result " + str(record))
    return record
def store_results(record):
    # Step 1: ensure that a licence was applied, and if not apply one
    if "bibjson" not in record:
        # no bibjson record, so add a blank one
        log.debug("record does not have a bibjson record.")
        record['bibjson'] = {}
        
    if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0:
        # the bibjson record does not contain a license list OR the license list is of zero length
        log.debug("Licence could not be detected, therefore adding 'unknown' licence to " + str(record['bibjson']))
        recordmanager.add_license(record,
            url=config.unknown_url,
            type="failed-to-obtain-license",
            open_access=False,
            error_message="unable to detect licence",
            category="failure",
            provenance_description="no plugin was found that would try to detect a licence.  This entry records that the license is therefore unknown",
        )
        # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url)
        
    # Step 2: unqueue the record
    if record.has_key("queued"):
        log.debug(str(record['identifier']) + ": removing this item from the queue")
        del record["queued"]
    
    # Step 3: update the archive
    _add_identifier_to_bibjson(record['identifier'], record['bibjson'])
    log.debug(str(record['identifier']) + ": storing this item in the archive")
    models.Record.store(record['bibjson'])
    
    # Step 4: update the cache
    log.debug(str(record['identifier']) + ": storing this item in the cache")
    _update_cache(record)
    
    # we have to return the record so that the next step in the chain can
    # deal with it (if such a step exists)
    log.debug("yielded result " + str(record))
    return record