def predefined(self, queryUrl=None, **kwargs): assert queryUrl is not None, 'queryUrl parameter required' try: bindings = {key: URIRef(kwargs[key]) for key in kwargs} response = Graph(store=self._store).query(prepareQuery( self._resolver.resolve(queryUrl)), initBindings=bindings) register_query(queryUrl, bindings, self._resolver) if response.type == 'CONSTRUCT': #These cannot be JSON-serialized so we extract the data with a SELECT g = Graph() g += response response = g.query( prepareQuery("SELECT ?s ?p ?o WHERE {?s ?p ?o}")) return response finally: self._store.flush_log(logging.DEBUG)
def _next(self): xml_input = urllib2.urlopen(self._url, timeout=20) page = XMLGraph(xml_input) self._handle_resumption_token(page) puredomain = URIRef('http://{}/'.format(self._location)) prepd = prepareQuery(self._query, initNs={'puredomain': puredomain}) return page.query(prepd)
def build(self, graph): try: result = graph.query(prepareQuery(self._query)) task = result.__iter__().next() logging.debug(task['pureurl']) return self._url_to_iterator_fun(task['pureurl']) except StopIteration: return None
def _next(self): xml_input = urllib2.urlopen(self._url, timeout=20) page = XMLGraph(xml_input) self._handle_resumption_token(page) puredomain = URIRef('http://{}/'.format(self._location)) prepd = prepareQuery(self._query, initNs={'puredomain' : puredomain}) return page.query(prepd)
def dynamic(self, name=None, query=None): assert name is not None, 'name parameter required' assert query is not None, 'query parameter required' self._store.log(name) try: response = Graph(store=self._store).query(prepareQuery(query)) return response finally: self._store.flush_log(logging.DEBUG)
def __init__(self, xml_input = None, url = None): '''Specify either xml_input or url @param xml_input An XML string @param url A URL string referring to an XML document ''' self._timestamp_triple = None if xml_input is None: #Load XML from URL into xml_input assert url is not None, 'Need xml_input or url' self._timestamp_triple = (URIRef(url), SEPAKE.wasDetailedAtTime, Literal(datetime.utcnow())) xml_input = urllib2.urlopen(url, timeout = 30) if url is None: #Dummy URL for internal data url = 'file:///' #Transform XML to a an RDF graph, removing unnecessary parts in the process self._xml_as_rdf = _slimmed_xml_as_rdf(xml_input) #Parse the two queries used to create triples from self._xml_as_rdf self._queries = [prepareQuery(_CONSTRUCT_PROJECT, initNs = {'rest_url' : URIRef(url)}), prepareQuery(_CONSTRUCT_PEOPLE, initNs = {'rest_url' : URIRef(url)})]
def __init__(self, url): self._url = url self._original_url = url logging.debug(url) url_parsed = urlparse(url) self._location = url_parsed.netloc self._more = True self._query = prepareQuery(_CONSTRUCT_PAPERS, initNs={'puredomain' : URIRef('http://{}/'.format(self._location)), 'oai_url' : URIRef(url)})
def __init__(self, url): self._url = url self._original_url = url logging.debug(url) url_parsed = urlparse(url) self._location = url_parsed.netloc self._more = True self._query = prepareQuery(_CONSTRUCT_PAPERS, initNs={ 'puredomain': URIRef('http://{}/'.format( self._location)), 'oai_url': URIRef(url) })
def __init__(self, graph, tasks, verbose=False): self._graph = graph self._tasks = tasks self._queries = [prepareQuery(q) for q in _CONSTRUCTS] self._verbose = verbose
def details_iterator_generator(graph): tasks = [task for task in graph.query(prepareQuery(_TASKS))] if len(tasks) > 0: return PureRESTPublicationHarvester(graph, tasks) else: return None
def __init__(self, graph, tasks, verbose = False): self._graph = graph self._tasks = tasks self._queries = [prepareQuery(q) for q in _CONSTRUCTS] self._verbose = verbose