def addResource(self, uri, sourceNamespace, setNamespace, batchTag=None): self.logger.info("Adding resource with uri: " + str(uri)) record = None message = None try: Utils.checkURI(uri) except URIException as e: raise Exception("Resource uri did not validate. uri: " + str(uri)) params = { 'sourceNamespace': sourceNamespace, 'setNamespace': setNamespace, 'uri': uri } if batchTag: params['batchTag'] = batchTag try: response = Utils.postRSData(self.resourceURI, params) except Exception as e: raise BadResourceURL( "Could not add resource. resourceURI: " + str(self.resourceURI), e) record = Utils.getJSONFromResponse(response) message = self.getMessage(record) if message: self.logger.warning(message) return record, message
def addCapability(self, capURL, sourceNamespace, setNamespace, capType): self.logger.info("Adding capability with url:" + str(capURL)) record = None message = None try: Utils.checkURI(capURL) except Exception as e: self.logger.warning("Capability URL did not validate. url: " + str(capURL) + " ERROR: " + str(e)) raise Exception("Capability URL did not validate. url: " + str(capURL) + " ERROR: " + str(e)) params = { 'sourceNamespace': sourceNamespace, 'setNamespace': setNamespace, 'uri': capURL, 'capabilityType': capType } try: response = Utils.postRSData(self.capabilityURI, params) except Exception as e: self.logger.critical("Could not add capability. capabiltyURI: " + str(self.capabilityURI) + " ERROR: " + str(e)) raise BadResourceURL(str(e)) record = Utils.getJSONFromResponse(response) message = self.getMessage(record) if message: self.logger.warning(message) return record, message
def __init__(self, sourceNamespace, setNamespace, opts): super().__init__(sourceNamespace, setNamespace, opts) self.logger.info("initializing OAILoader") try: Utils.validateRequired(opts, ['OAISource', 'OAIMetaDataPrefix']) except ValueError as e: raise RSLoaderError("Missing required parameter.", e, self.logger) try: Utils.checkURI(str(self.OAISource) + "?verb=Identify") except Exception as e: raise RSLoaderError("OAISource url did not validate. ", e, self.logger) return None
def __init__(self, sourceNamespace, setNamespace, opts): super().__init__(sourceNamespace, setNamespace, opts) self.logger.info("initializing ListLoader") try: Utils.validateRequired(opts, ['ListSource']) except ValueError as e: raise RSLoaderError("Missing required parameter.", e, self.logger) try: Utils.checkURI(str(self.ListSource)) except Exception as e: raise RSLoaderError("ListSource url did not validate. ", e, self.logger) return None
def __init__(self, sourceNamespace, setNamespace, opts): try: super().__init__(sourceNamespace, setNamespace, opts) except Exception as e: raise Exception("Could not start RSLoader. " + str(e)) self.logger.info("initializing OAILoader") Utils.validateRequired(opts, ['OAISource', 'OAIMetaDataPrefix']) try: Utils.checkURI(str(self.OAISource) + "?verb=Identify") except Exception as e: self.logger.critical( self.msg("OAISource url did not validate. " + str(e))) raise ValueError("OAISource url did not validate. " + str(e)) return None
def __init__(self, endpointURI): logger.info("Initializing RSRestClient") #ensure that there is a trailing slash on the endpoint if endpointURI[-1] != "/": endpointURI = str(endpointURI) + "/" self.endpointURI = endpointURI self.resourceURI = str(self.endpointURI) + "resource" logger.info("Checking resourceURI: " + str(self.resourceURI)) try: Utils.checkURI(self.resourceURI) except Exception as e: logger.critical("ResourceURI did not validate: " + str(self.resourceURI) + " ERROR:" + str(e)) raise TargetURIException( "ResourceURI did not validate: " + str(self.resourceURI), e) self.capabilityURI = str(self.endpointURI) + "capability"
def makeDump(self): if self.createDump: try: contents = self.targetEndpoint.addDump(self.batchTag, self.sourceNamespace, self.setNamespace) except Exception as e: logger.critical(self.msg("Could not add dump.")) raise AddDumpException("Could not add dump.", e) zipURI = contents while True: retries = 0 try: uriResponse = Utils.checkURI(zipURI) except Exception as e: #allow up to 1 hour for zip creation - sleep 60 seconds and try 60 times time.sleep(60) retries = retries + 1 if retries > 60: logger.critical( self.msg("Too many retries waiting for " + str(zipURI))) raise AddDumpException( "Too many retries waiting for " + str(zipURI)) continue if uriResponse: logger.info("Found zipURI.") break result = self.addCapability(zipURI, 'dump') return result return False
def getManifest(self, batchTag, sourceNamespace, setNamespace): url = self.endpointURI + "/static/" + str(sourceNamespace) + "/" + str( setNamespace) + "/" + str(batchTag) + "/manifest" urlCheck = Utils.checkURI(url) if not urlCheck: return False contents = Utils.getContent(url) return contents
def getResources(self, offset=0, count=20): url = self.endpointURI + str("resource") url = str(url) + "?offset=" + str(offset) + "&count=" + str(count) urlCheck = Utils.checkURI(url) if not urlCheck: return False f = urllib.request.urlopen(url) contents = Utils.getContent(url) return contents
def loadCapabilityList(self, sourceNamespace, setNamespace): url = self.endpointURI + "/RS/" + str(sourceNamespace) + "/" + str( setNamespace) + "/capabilitylist.json" urlCheck = Utils.checkURI(url) if not urlCheck: return False response = Utils.getResponse(url) data = Utils.getJSONFromResponse(response) if 'urlset' in data: if 'url' in data['urlset']: return data['urlset']['url'] return []
def loadManifestIDs(self, sourceNamespace, setNamespace, batchTag): url = self.endpointURI + "/static/" + str(sourceNamespace) + "/" + str( setNamespace) + "/" + str(batchTag) + "/manifest" urlCheck = Utils.checkURI(url) if not urlCheck: return False ids = [] contents = Utils.getContent(url) lines = contents.split("\n") for line in lines: parts = line.split('><') resourceID = parts[-1] resourceID = resourceID.replace('/resource/', '') resourceID = resourceID.replace('>', '') ids.append(resourceID) return ids
def loadResourceListIndex(self, sourceNamespace, setNamespace): url = self.endpointURI + "/RS/" + str(sourceNamespace) + "/" + str( setNamespace) + "/resourcelistindex.json" urlCheck = Utils.checkURI(url) if not urlCheck: return False response = Utils.getResponse(url) data = Utils.getJSONFromResponse(response) urls = [] if 'sitemapindex' in data: if 'sitemap' in data['sitemapindex']: sitemap = data['sitemapindex']['sitemap'] for record in sitemap: if 'rs:ln' in record: if '@type' in record['rs:ln']: if str(record['rs:ln'] ['@type']).lower() == 'application/json': urls.append(record['rs:ln']['@href']) return urls
def loadResourceListIDs(self, url): url = self.convertToRSDomain(url) urlCheck = Utils.checkURI(url) if not urlCheck: return False response = Utils.getResponse(url) data = Utils.getJSONFromResponse(response) ids = [] if 'urlset' in data: if 'url' in data['urlset']: urls = data['urlset']['url'] for record in urls: if 'rs:ln' in record: if 'rel' in record['rs:ln']: if str(record['rs:ln'] ['rel']).lower() == 'describedby': resourceID = record['rs:ln']['href'] resourceID = resourceID.replace( '/resource/', '') ids.append(resourceID) return ids