def store_results(record): """ Store the OAG record object in all the appropriate locations: - in the cache - in the archive In order to achieve this, this method will also ensure that the object has at least one licence (indicating that we "failed-to-obtain-license"), and that the bibjson identifiers are all in the appropriate locations. It will then remove the item from the processing queue prior to storage arguments: record -- an OAG record object, see the module documentation for details returns: passed in record object, with the queued status removed and any other internal changes necessary to prepare it for storage """ # Step 1: ensure that a licence was applied, and if not apply one if "bibjson" not in record: # no bibjson record, so add a blank one log.debug("record does not have a bibjson record.") record['bibjson'] = {} if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0: # the bibjson record does not contain a license list OR the license list is of zero length log.debug("Licence could not be detected, therefore adding 'unknown' licence to " + str(record['bibjson'])) recordmanager.add_license(record, url=config.unknown_url, type="failed-to-obtain-license", open_access=False, error_message="unable to detect licence", category="failure", provenance_description="no plugin was found that would try to detect a licence. This entry records that the license is therefore unknown", ) # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url) # Step 2: unqueue the record if record.has_key("queued"): log.debug(str(record['identifier']) + ": removing this item from the queue") del record["queued"] # Step 3: update the archive _add_identifier_to_bibjson(record['identifier'], record['bibjson']) log.debug(str(record['identifier']) + ": storing this item in the archive") models.Record.store(record['bibjson']) # Step 4: update the cache log.debug(str(record['identifier']) + ": storing this item in the cache") _update_cache(record) # we have to return the record so that the next step in the chain can # deal with it (if such a step exists) log.debug("yielded result " + str(record)) return record
def provider_licence(record): """ Attempt to determine the licence of the record based on the provider information contained in record['provider']. Whether this is successful or not a record['bibjson']['license'] record will be added. If the operation was successful this will contain a licence statement about the item conforming to the OAG record specification. If the operation was not successful it will contain a "failed-to-obtain-license" record, indicating the terms of the failure. arguments: record -- an OAG record object, see the module documentation for details returns: the passed in record object with the record['bibjson']['license'] field added or appended to with a new licence """ # Step 1: check that we have a provider indicator to work from if not record.has_key("provider"): log.debug("record has no provider, so unable to look for licence: " + str(record)) return record # Step 2: get the plugin that will run for the given provider p = plugin.PluginFactory.license_detect(record["provider"]) if p is None: log.debug("No plugin to handle provider: " + str(record['provider'])) return record log.debug("Plugin " + str(p) + " to handle provider " + str(record['provider'])) # Step 3: run the plugin on the record if "bibjson" not in record: # if the record doesn't have a bibjson element, add a blank one record['bibjson'] = {} p.license_detect(record) # was the plugin able to detect a licence? # if not, we need to add an unknown licence for this provider if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0: log.debug("No licence detected by plugin " + p._short_name + " so adding unknown licence") recordmanager.add_license(record, url=config.unknown_url, type="failed-to-obtain-license", open_access=False, error_message="unable to detect licence", category="failure", provenance_description="a plugin ran and failed to detect a license for this record. This entry records that the license is therefore unknown", handler=p._short_name, handler_version=p.__version__ ) # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url, p._short_name, p.__version__) # we have to return the record so that the next step in the chain can # deal with it log.debug("plugin " + str(p) + " yielded result " + str(record)) return record
def describe_license_fail(self, record, source_url, why, suggested_solution='', licence_url=""): recordmanager.add_license( record, source=source_url, error_message=why, suggested_solution=suggested_solution, url=licence_url, type="failed-to-obtain-license", open_access=False, category="page_scrape", provenance_description=self.gen_provenance_description_fail(source_url), handler=self._short_name, handler_version=self.__version__ )
def provider_licence(record): # Step 1: check that we have a provider indicator to work from if not record.has_key("provider"): log.debug("record has no provider, so unable to look for licence: " + str(record)) return record # Step 2: get the plugin that will run for the given provider p = plugin.PluginFactory.license_detect(record["provider"]) if p is None: log.debug("No plugin to handle provider: " + str(record['provider'])) return record log.debug("Plugin " + str(p) + " to handle provider " + str(record['provider'])) # Step 3: run the plugin on the record if "bibjson" not in record: # if the record doesn't have a bibjson element, add a blank one record['bibjson'] = {} p.license_detect(record) # was the plugin able to detect a licence? # if not, we need to add an unknown licence for this provider if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0: log.debug("No licence detected by plugin " + p._short_name + " so adding unknown licence") recordmanager.add_license(record, url=config.unknown_url, type="failed-to-obtain-license", open_access=False, error_message="unable to detect licence", category="failure", provenance_description="a plugin ran and failed to detect a license for this record. This entry records that the license is therefore unknown", handler=p._short_name, handler_version=p.__version__ ) # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url, p._short_name, p.__version__) # we have to return the record so that the next step in the chain can # deal with it log.debug("plugin " + str(p) + " yielded result " + str(record)) return record
def store_results(record): # Step 1: ensure that a licence was applied, and if not apply one if "bibjson" not in record: # no bibjson record, so add a blank one log.debug("record does not have a bibjson record.") record['bibjson'] = {} if "license" not in record['bibjson'] or len(record['bibjson'].get("license", [])) == 0: # the bibjson record does not contain a license list OR the license list is of zero length log.debug("Licence could not be detected, therefore adding 'unknown' licence to " + str(record['bibjson'])) recordmanager.add_license(record, url=config.unknown_url, type="failed-to-obtain-license", open_access=False, error_message="unable to detect licence", category="failure", provenance_description="no plugin was found that would try to detect a licence. This entry records that the license is therefore unknown", ) # describe_license_fail(record, "none", "unable to detect licence", "", config.unknown_url) # Step 2: unqueue the record if record.has_key("queued"): log.debug(str(record['identifier']) + ": removing this item from the queue") del record["queued"] # Step 3: update the archive _add_identifier_to_bibjson(record['identifier'], record['bibjson']) log.debug(str(record['identifier']) + ": storing this item in the archive") models.Record.store(record['bibjson']) # Step 4: update the cache log.debug(str(record['identifier']) + ": storing this item in the cache") _update_cache(record) # we have to return the record so that the next step in the chain can # deal with it (if such a step exists) log.debug("yielded result " + str(record)) return record