def inspect(self, context: Context) -> PropertyValues: """ get properties by calling on the handler """ xsd = getns('xsd') if self.handler is None: #handler_factory = context['handler_factory'] handler_args = context['handler_args'] # dict of kwargs logging.debug("in inspect, cntext has: {0}".format(context)) logging.debug( "in inspect, handler_args has: {0}".format(handler_args)) self.handler = handlers.factory(**handler_args) #self.handler = handler_factory(**handler_args) retval = set() predicate = context['predicate'] if predicate == 'dct:temporal': context.push(handler=self.handler, predicate=predicate) t = Temporal() t.add_to_graph(context) context.pop(('handler', 'predicate')) retval.add(t.uri) elif predicate == 'dcat:byteSize': value = self.handler.size xsd = getns('xsd') retval.add(Literal(value, datatype=xsd.integer)) elif predicate == 'logset:recordCount': if self.handler.num_records is not None: retval.add( Literal(self.handler.num_records, datatype=xsd.integer)) elif predicate == 'logset:estRecordCount': if self.handler.num_records is not None: retval.add( Literal(self.handler.num_records, datatype=xsd.integer)) else: buf = list(self.handler.get_slice(limit=10)) avg = sum([len(entry) for entry in buf]) / len(buf) guess = self.handler.size / avg retval.add(Literal(guess, datatype=xsd.integer)) elif predicate == 'logset:estRecordsPerDay': # get timespan from handler, convert to days, etc # the handler stores the timespan just as text, so need to convert t_earliest = dateutil.parser.parse(self.handler.t_earliest) t_latest = dateutil.parser.parse(self.handler.t_latest) days = (t_latest - t_earliest).total_seconds() / 86400 nrecs = self.get_one_value('logset:recordCount') or \ self.get_one_value('logset:estRecordCount') nrecs = int(float(str(nrecs))) # sorry nrecs_per_day = int(nrecs / days) retval.add(Literal(nrecs_per_day, datatype=xsd.integer)) return retval
def truefalse(self, context: Context) -> PropertyValues: prompt = self.prompts.get(context['predicate']) choice = UI.truefalse(prompt) response = 'true' if choice else 'false' xsd = graph.getns('xsd') logging.debug("truefalse returning " + response) retval = set([Literal(response, datatype=xsd.boolean)]) return retval
def inspect(self, context: Context) -> PropertyValues: retval = set() predicate = context['predicate'] handler = context['handler'] xsd = getns('xsd') if predicate == 'logset:startDate': retval.add(Literal(handler.t_earliest, datatype=xsd.dateTime)) elif predicate == 'logset:endDate': # note if it is a live source, enddate should be None retval.add(Literal(handler.t_latest, datatype=xsd.dateTime)) return retval
def add_to_graph(self, context: Context = None): if self._in_graph: logging.debug("already in graph, skipping") return if context is None: context = Context() # describe my properties first, so subclasses can use them to generate a helpful uri if necessary: # I think that to avoid loops we need to do all of the asking (the user) # before doing any of the adding to graph: triples = [] context.push(node=self) for predicate in self.properties: context.push(predicate=predicate) # need to convert string eg foaf:name to an actual uri for adding # to graph: logging.debug("calling Graph.geturi on {0}".format(predicate)) pred_uri = graph.geturi(predicate) logging.debug("calling get_values with {0}, {1}".format( str(predicate), str(context))) for v in self.get_values(predicate, context): if isinstance(v, Identifier): triples.append((self.uri, pred_uri, v)) elif isinstance(v, Node): triples.append((self.uri, pred_uri, v.uri)) else: # I'm pretty sure this should never happen raise Exception("oh oh! " + str(v) + " ... " + str(type(v))) context.pop(('predicate', )) context.pop(('node', )) for triple in triples: logging.debug("adding triple {0}".format(triple)) self.graph.add(triple) # finally, describe me: rdf = graph.getns('rdf') myclass = graph.geturi(self.rdf_class) logging.info("adding me to graph: {0}, {1}, {2}".format( self.uri, str(rdf.type), str(myclass))) self.graph.add((self.uri, rdf.type, myclass)) self._in_graph = True
def add_to_graph(self, context: Context = None): super().add_to_graph(context) dcat = getns('dcat') self.graph.add((context['logset'].uri, dcat.distribution, self.uri))