def time_store_select(self, store, query_template, basefile, context=None, label="things"): values = {'basefile': basefile, 'label': label, 'count': None} uri = self.canonical_uri(basefile) msg = ("%(basefile)s: selected %(count)s %(label)s " "(%(elapsed).3f sec)") with util.logtime(self.log.debug, msg, values): result = self.store_select(store, query_template, uri, context) values['count'] = len(result) return result
def select(self, template, uri, format="json"): sq = util.readfile(template) % {'uri': uri} ts = TripleStore.connect(self.config.storetype, self.config.storelocation, self.config.storerepository) print("# Constructing the following from %s, repository %s, type %s" % (self.config.storelocation, self.config.storerepository, self.config.storetype)) print("".join(["# %s\n" % x for x in sq.split("\n")])) p = {} with util.logtime(print, "# Selected in %(elapsed).3fs", p): res = ts.select(sq, format=format) print(res.decode('utf-8'))
def parse(self, basefile): with util.logtime(self.log.info, "%(basefile)s OK (%(elapsed).3f sec)", {'basefile': basefile}): ret = False for c in self.subrepos: inst = self.get_instance(c, self.myoptions) try: # each parse method should be smart about whether to re-parse # or not (i.e. use the @managedparsing decorator) ret = inst.parse(basefile) except errors.ParseError: # or others ret = False if ret: break if ret: self.copy_parsed(basefile, inst) return ret
def time_store_select( self, store, query_template, basefile, context=None, label="things"): values = {'basefile': basefile, 'label': label, 'count': None} uri = self.canonical_uri(basefile) msg = ("%(basefile)s: selected %(count)s %(label)s " "(%(elapsed).3f sec)") with util.logtime(self.log.debug, msg, values): result = self.store_select(store, query_template, uri, context) values['count'] = len(result) return result
def get_treenotice_graph(self, cellarurl, celexid): # avoid HTTP call if we already have the data if os.path.exists(self.store.intermediate_path(celexid, suffix=".ttl")): self.log.info("%s: Opening existing TTL file" % celexid) with self.store.open_intermediate(celexid, suffix=".ttl") as fp: return Graph().parse(data=fp.read(), format="ttl") # FIXME: read the rdf-xml data line by line and construct a # graph by regex-parsing interesting lines with a very simple # state machine, rather than doing a full parse, to speed # things up resp = util.robust_fetch(self.session.get, cellarurl, self.log, headers={"Accept": "application/rdf+xml;notice=tree"}, timeout=10) if not resp: return None with util.logtime(self.log.info, "%(basefile)s: parsing the tree notice took %(elapsed).3f s", {'basefile': celexid}): graph = Graph().parse(data=resp.content) return graph