def fmtInfo(self) -> MultiDict: if self._fmtInfo is None: self._fmtInfo = MultiDict() for prop in self.get_values('logset:logFormatInfo'): key, sep, val = prop.partition('=') self._fmtInfo.add(key, val) return self._fmtInfo
class LuceneQuery(object): MANDATORY = 'mandatory' PROHIBITED = 'prohibited' OPTIONAL = 'optional' required_modifiers = {MANDATORY: '+', PROHIBITED: '-', OPTIONAL: ''} def __init__(self, *args, **kw): first_arg = args[0] if hasattr(first_arg, 'solr_field_name'): self.field, args = first_arg, args[1:] elif isinstance(first_arg, LuceneQuery): # Only pass LuceneQuery if all the other args are LuceneQueries self.field = None self.components = itertools.chain([query.components for query in args]) else: self.field = None self.use_colon = True self.components = list(args) self.local_params = MultiDict() self.required = LuceneQuery.OPTIONAL self.boost_factor = None def __pow__(self, power): if not self.components: self.use_colon = False self.boost_factor = power return self def tag(self, tag): self.local_params.add('tag', tag) return self def require(self): self.required = LuceneQuery.MANDATORY return self def negate(self): self.required = LuceneQuery.PROHIBITED return self def fuzzy(self, factor=NotGiven): self.components.append('~') if factor != NotGiven: self.components.append(factor) return self def __unicode__(self): modifier = self.required_modifiers[self.required] field_clause = '' if not self.field else unicode(self.field) separator = ':' if self.use_colon else '' local_params = '' if self.local_params: local_params = '{!' + ' '.join(['%s=%s' % (key, unicode(val)) for key, val in self.local_params.iteritems() ]) + '}' component_clause = ''.join([unicode(component) for component in self.components]) boost_factor = '^%s' % self.boost_factor if self.boost_factor else '' return u''.join([local_params, modifier, field_clause, separator, component_clause, boost_factor])
def __init__(self,environ): self.environ = environ self.path = environ.get('PATH_INFO','').decode('utf8') self.method = environ.get('REQUEST_METHOD','') self.cookies = Cookie.SimpleCookie(str(environ.get('HTTP_COOKIE',''))) self.GET = MultiDict(cgi.parse_qsl(urllib2.unquote(environ.get('QUERY_STRING','')).decode('utf8'),keep_blank_values=1)) self.POST = {} self.FILES = {} if self.method == 'POST': post = cgi.FieldStorage(fp=environ.get('wsgi.input',''),environ=environ,keep_blank_values=1) self.POST = MultiDict([(item.name,item.value) for item in post.list if not item.filename]) self.FILES = MultiDict([(item.name,item) for item in post.list if item.filename]) self.REQUEST = MultiDict(self.GET.items() + self.POST.items()) # + self.FILES.items())
def __init__(self,body='',content_type='text/html',charset='utf8',redirect=None,status_code=None,status_msg=None): self.headers = MultiDict() self.content_type = content_type self.charset = charset self.body = body self.redirect = redirect self.status_code = status_code self.status_msg = status_msg self.cookies = Cookie.SimpleCookie()
def finalize(self): self.headers['Content-Type'] = '%s; charset=%s' % (self.content_type, self.charset) self.headers['Content-Length'] = len(self.body) if self.redirect: self.headers['Location'] = self.redirect self.status_code = self.status_code or 302 for key in self.cookies: self.cookies[key]['path'] = self.cookies[key]['path'] or '/' self.headers['Set-Cookie'] = self.cookies[key].OutputString() self.status_code = self.status_code or 200 self.headers = MultiDict([(safestr(k,self.charset),safestr(v,self.charset)) for (k,v) in self.headers.items()])
def __init__(self, properties: MultiDict = None, **kwargs) -> None: # lazy getting of uri is going to be common enough to just build it into the base class: self._uri: Optional[str] = None self._namespace: Optional[str] = None self._label: Optional[str] = None # eg "dct:title": set(Literal("my title")) self.properties = MultiDict(properties) for key, val in kwargs.items(): self.properties.add(key, val) if 'uri' in self.properties: self._uri = self.properties.one('uri') self.properties.remove('uri') if 'namespace' in self.properties: self._namespace = self.properties.one('namespace') self.properties.remove('namespace') # if properties is not None and 'uri' in properties: # logging.debug("setting uri from properties: {0}".format(properties['uri'])) # #self._uri = properties.pop('uri')[0] # self._uri = properties.one('uri') # properties.remove('uri') # # if properties is not None and 'namespace' in properties: # logging.debug("setting namespace from properties: {0}".format(properties['namespace'])) # #self._uri = properties.pop('uri')[0] # self._namespace = properties.one('namespace') # properties.remove('namespace') # all attributes we have a getter for should have an # entry in properties, even if it is empty: for predicate in self.getters: if predicate not in self.properties: self.properties.add(predicate) # when adding a node to the graph and recursing into its properties, # we want a mechanism to bypass adding nodes that are already in the # graph: self._in_graph = False
def __init__(self, *args, **kw): first_arg = args[0] if hasattr(first_arg, 'solr_field_name'): self.field, args = first_arg, args[1:] elif isinstance(first_arg, LuceneQuery): # Only pass LuceneQuery if all the other args are LuceneQueries self.field = None self.components = itertools.chain([query.components for query in args]) else: self.field = None self.use_colon = True self.components = list(args) self.local_params = MultiDict() self.required = LuceneQuery.OPTIONAL self.boost_factor = None
def __init__(self, target_url: str = None, fmtinfo: MultiDict = None): # properties:MultiDict=None) -> None: """ constructor should take properties as a keyword argument (to pass to the Node superclass constructor). target_url (eg the file path to be opened) should be supported but not required (because Node factories might not provide it), same for fmtinfo """ #super().__init__(properties=properties) self._size = None self._t_earliest = None self._t_latest = None self._actual_file = TextFile.factory(target_url) for attr, parserule in self.fmtinfo_parsers.items(): if attr in fmtinfo: value = fmtinfo.one(attr) m = parserule.regex.search(value) setattr(self, attr, parserule.parser(m)) self.filters = {}
def __init__(self, mods : dict, ties : set, name : str = ""): super().__init__(name) #build operations _ops = dict() for mod_name, opcode in mods.items(): _ops[mod_name] = Operation(mod_name, opcode) #gather values _ties = MultiDict() # src -> (dst, dst_port) for src_name, dst_name, dst_port in ties: _ties[_ops[src_name]] = (_ops[dst_name], dst_port) #build actual val objects _values = set() for src in _ties: _values.add(Value(src, _ties[src])) self._operations = frozenset(_ops.values()) self._values = frozenset(_values)
def instance(cls, uri): """ given the uri of a node, instantiate an object of this class based on the properties of that node in the graph """ if not isinstance(uri, URIRef): uri = graph.geturi(uri) mdict = MultiDict(uri=[uri]) preds = [k for k in cls.getters.keys()] # impose an order preds_by_uri = {graph.geturi(p): p for p in preds} for pred_uri, obj in self.graph.predicate_objects(uri): if pred_uri in preds_by_uri: mdict.add(preds_by_uri[pred_uri], obj) else: # its a property we're not prepared for, just include it as an Identifier mdict.add(pred_uri, obj) return cls(properties=mdict)
def catalog(self, candidates: Set[FileInfo], context: Context) -> Set[FileInfo]: filepatterns = context['filepatterns'] matching = set() for regex in filepatterns: logging.info("filepattern: {0}: {1}".format(regex, regex.pattern)) logging.info("{0:d} candidates".format(len(candidates))) logging.info("candidates: {0}".format(candidates)) logging.info( "after filtering: " + str(filter(lambda x: regex.match(x.filename), candidates))) matching |= set( filter(lambda x: regex.match(x.filename), candidates)) # filter by MIMEtype: mediatypes = self.properties['dcat:mediaType'] mimetest = lambda x: self.right_mime_type(x) matching -= set(filter(mimetest, matching)) # args for the LogFormatType that will handle the actual file/source: handler_args = { 'rdf_class': context['logFormatType'], 'fmtinfo': context['formatinfo'] } # 'properties': self.properties #context.push(handler_factory=handlers.factory) context.push(handler_args=handler_args) # hmm, everything the ConcreteLog infers, we can just pass as properties: common_properties = MultiDict() common_properties.add('logset:isInstanceOf', context['logseries_uri']) common_properties.add('dcat:accessURL', context['dcat:accessURL']) #common_properties.add_values('logset:subject', context['subjects']) common_properties.add('logset:subject', *context['subjects']) common_properties.add('namespace', context['namespace']) logging.info("logset for concretelog has: {0}".format( context['logset'])) # in most cases we are looking file-by-file .. FilePerTimepoint # can override this default behavior for f in matching: #context.push(label=f.filename) #context.push({'dcat:downloadURL': f.relpath + os.sep + f.filename}) properties = MultiDict(common_properties) properties.add('rdfs:label', Literal(f.filename)) relpath = (f.relpath + os.sep + f.filename).lstrip(os.sep) properties.add('dcat:downloadURL', Literal(relpath)) logging.info( "properties for concretelog has: {0}".format(properties)) handler_args['target_url'] = os.sep.join(f) # full local path log = ConcreteLog(properties=properties) logging.info("adding log to graph: {0}".format(log)) try: log.add_to_graph(context) except UnsupportedLogFormatHandler as err: logging.warn("logformat {0} not implemented".format(err)) properties.remove('rdfs:label') properties.remove('dcat:downloadURL') context.pop(('handler_args', )) #context.pop('handler_factory') return candidates - matching
class Response(object): def __init__(self,body='',content_type='text/html',charset='utf8',redirect=None,status_code=None,status_msg=None): self.headers = MultiDict() self.content_type = content_type self.charset = charset self.body = body self.redirect = redirect self.status_code = status_code self.status_msg = status_msg self.cookies = Cookie.SimpleCookie() @property def status(self): status_map = {200:'OK',301:'Moved Permanently',302:'Found',404:'Not Found',500:'Internal Server Error'} if not self.status_msg: self.status_msg = status_map.get(self.status_code,'') return "%s %s" % (self.status_code,self.status_msg) def finalize(self): self.headers['Content-Type'] = '%s; charset=%s' % (self.content_type, self.charset) self.headers['Content-Length'] = len(self.body) if self.redirect: self.headers['Location'] = self.redirect self.status_code = self.status_code or 302 for key in self.cookies: self.cookies[key]['path'] = self.cookies[key]['path'] or '/' self.headers['Set-Cookie'] = self.cookies[key].OutputString() self.status_code = self.status_code or 200 self.headers = MultiDict([(safestr(k,self.charset),safestr(v,self.charset)) for (k,v) in self.headers.items()])
def factory(node: str): cls = classes[node] properties = MultiDict(uri=[cls.rdf_node]) return cls(properties=properties)
class Node: """ A node in the global RDF graph """ # what RDF class is this? (corresponds to "thing a class" in RDF). Note # that this is a string like "logset:ConcreteLog" - we won't know the # URI until after the graph has been constructed # Should be concrete, ie vcard:Organization not vcard:Kind rdf_class: ClassVar[str] = '' # eg "foaf:Organization" # certain node types are created as a specific class (eg foaf:Organization # for Agent), but when querying the graph we need to find any subclass of # some superclass (eg foaf:Agent for Agent). If that is the case, the # Node class should specify the superclass with: rdf_superclass: ClassVar[str] = '' # eg "foaf:Agent" # when adding a Node to the graph, triples for a certain set of properties # (based on the type of Node) are expected. These might be obtained by # querying a file/source or asking the user or infering from context, etc. # Each class specifies the expected properties and how to obtain them via a # class-variable 'getters', which is a dict mapping a predicate (str) to # the method (of self) that obtains it. # getter methods are called like: # values:PropertyValues = self.getter(context:Context) getters: ClassVar[PropertyGetterDict] = {} # some predicates *must* be present, indicate these with: required_properties: ClassVar[Set[str]] = set() # to support finding known nodes of a given type, Node classes should # define a sparql query that returns rows from which a Node can be # instantiated. This is done by mapping each row to the finder_fields # and passing the result as a PropertyDict to __init__ (see method known()) finder_query: str = '' finder_fields: List[str] = [] # for 'select' and 'multi_select' getters, what target class is being selected? targets: Dict[str, NodeType] = {} # eg 'dct:publisher': Agent # prompts for simple questions system can "ask" to get some properties: prompts: Dict[str, str] = {} # eg 'dct:title': "Give the LogSet a title" # which property to use as a label? (this is useful because when defining # a new Thing and asking the user questions about it, it is helpful to # create the instance with a label that is included in the prompts. The # label generally corresponds with one of the properties of the Node, so # we require each subclass to indicate which property it will use label_property: str = None label_alternate = 'this' @property def graph(self): #logging.debug("getting graph: {0}".format(graph.Graph.the_graph)) return graph.Graph.the_graph #def __init__(self, properties:PropertyDict = None) -> None: def __init__(self, properties: MultiDict = None, **kwargs) -> None: # lazy getting of uri is going to be common enough to just build it into the base class: self._uri: Optional[str] = None self._namespace: Optional[str] = None self._label: Optional[str] = None # eg "dct:title": set(Literal("my title")) self.properties = MultiDict(properties) for key, val in kwargs.items(): self.properties.add(key, val) if 'uri' in self.properties: self._uri = self.properties.one('uri') self.properties.remove('uri') if 'namespace' in self.properties: self._namespace = self.properties.one('namespace') self.properties.remove('namespace') # if properties is not None and 'uri' in properties: # logging.debug("setting uri from properties: {0}".format(properties['uri'])) # #self._uri = properties.pop('uri')[0] # self._uri = properties.one('uri') # properties.remove('uri') # # if properties is not None and 'namespace' in properties: # logging.debug("setting namespace from properties: {0}".format(properties['namespace'])) # #self._uri = properties.pop('uri')[0] # self._namespace = properties.one('namespace') # properties.remove('namespace') # all attributes we have a getter for should have an # entry in properties, even if it is empty: for predicate in self.getters: if predicate not in self.properties: self.properties.add(predicate) # when adding a node to the graph and recursing into its properties, # we want a mechanism to bypass adding nodes that are already in the # graph: self._in_graph = False @classmethod def instance(cls, uri): """ given the uri of a node, instantiate an object of this class based on the properties of that node in the graph """ if not isinstance(uri, URIRef): uri = graph.geturi(uri) mdict = MultiDict(uri=[uri]) preds = [k for k in cls.getters.keys()] # impose an order preds_by_uri = {graph.geturi(p): p for p in preds} for pred_uri, obj in self.graph.predicate_objects(uri): if pred_uri in preds_by_uri: mdict.add(preds_by_uri[pred_uri], obj) else: # its a property we're not prepared for, just include it as an Identifier mdict.add(pred_uri, obj) return cls(properties=mdict) @classmethod def known(cls, filters: Dict[str, str] = dict()): """ generator-constructor over known nodes of a given type: """ # TODO instead of sampling property values, use "order by" and a douple # loop to actually get the full set of properties for a uri # (the general finder query is like: # select ?uri <other fields> <optional fields where { # ?uri a <self.rdf_class> . # ?uri <other predicate> <other variable> . # optional { # ?uri <other predicate> <other variable> . # } } order by ?uri # then make a multi-dict of the properties # so: # - first add a class var: required: list of required properties # # # list of variables: # nrequired = len(self.required) # optionals = [ key for key in self.getters if key not in self.required ] # ntotal = nrequired + len(optionals) # required = [ '?v{:d}'.format(i) for i in range(nrequired) ] # optional = [ '?v{:d}'.format(i) for i in range(nrequired,ntotal) ] # query = "SELECT ?uri " + ' '.join(self.required) + ' '.join(optionals) # query += " WHERE { " # if self.rdf_superclass is None: # query += " ?uri a {0} .".format(self.rdf_class) # else: # query += " ?uri a ?type ." # query += " ?type rdfs:subClassOf* {0} .".format(self.rdf_superclass) # for clause in zip(self.required, required): # query += " ?uri {0} {1} . ".format(clause[0], clause[1]) # if noptional > 0: # query += " OPTIONAL { " # for clause in zip(optionals, optional): # query += " ?uri {0} {1} . ".format(clause[0], clause[1]) # query += " } " # query += " } ORDER BY ?uri " # curr = None # next = None # for row in Graph.graph.query(query): # next = row[0] # the uri # if next != curr: # mdict = Multidict(next) # if curr is not None: # yield cls(properties=mdict) # curr = next # # add each var to mdict # for key,val in zip(required+optional,row): # mdict.add(key, [val]) # yield cls(properties=mdict) # the last one # # or easier still: since we don't actually enforce that certain properties are # required, make everything optional: # # if filters has uris/rdflib identifiers, then to convert to str they need < > around it # but if they are a string like 'ddict:someThing' then they should stay as they are: as_str = lambda x: "<{0}>".format(str(x)) if isinstance(x, Identifier ) else x logging.debug("filters has: {0}".format(filters)) preds = [k for k in cls.getters.keys()] # impose an order qvars = ['?v{:d}'.format(i) for i in range(len(preds))] query = "SELECT ?uri {0} WHERE {{ ".format(' '.join(qvars)) if cls.rdf_superclass is None: query += "?uri a {0} . ".format(cls.rdf_class) else: query += "?uri a ?type . " query += "?type rdfs:subClassOf* {0} . ".format(cls.rdf_superclass) for pred, var in zip(preds, qvars): if pred in filters: #query += "?uri {0} {1} . ".format(pred,str(filters[pred])) query += "?uri {0} {1} . ".format(pred, as_str(filters[pred])) else: query += "OPTIONAL {{ ?uri {0} {1} . }} ".format(pred, var) query += "} ORDER BY ?uri " logging.debug("query is: {0}".format(query)) curr_uri = None next_uri = None mdict: MultiDict = None #for row in graph.Graph.the_graph.query(query): for row in graph.query(query): logging.debug("found {0}".format(str(row))) next_uri = row[0] # the uri if next_uri != curr_uri: if curr_uri is not None: logging.debug("making a {0} with props {1}".format( cls.__name__, str(mdict))) yield cls(properties=mdict) mdict = MultiDict(uri=[next_uri]) curr_uri = next_uri # add each var to mdict for key, val in zip(preds, row[1:]): mdict.add(key, val) if mdict is not None: logging.debug("making a {0} with props {1}".format( cls.__name__, str(mdict))) yield cls(properties=mdict) # the last one else: logging.debug("no nodes of type {0} found".format(cls.__name__)) # #if cls.finder_query is None: # return #for row in Graph.graph.query(cls.finder_query): # logging.info("found row:" + str(row)) # # each think in the row needs to be a list for MultiDict: # lists = [ [r] for r in row ] # props = dict(zip(cls.finder_fields, lists)) # logging.debug("making a {0} with props {1}".format(cls.__name__, str(props))) # yield cls(properties=props) def get_values(self, predicate: str, context: Optional[Context] = None) -> PropertyValues: """ return a set of values for a property """ if context is None: context = Context(predicate=predicate) logging.debug("looking for {0} in {1}".format(predicate, str(self.properties))) props = self.properties.get(predicate) if len(props) == 0: logging.debug("calling a getter for {0}".format(predicate)) getter = getattr(self, self.getters[predicate]) logging.debug("got getter {0}, context {1}".format( getter, context)) generator = (v for v in getter(context)) self.properties.add(predicate, *generator) logging.debug("now {0} has: {1}".format( predicate, str(self.properties[predicate]))) return self.properties[predicate] def get_one_value(self, predicate: str, context: Context = None) -> PropertyValue: values = self.get_values(predicate, context) if values is None or len(values) == 0: return None else: return values.pop() #def label(self, context:Context=None) -> str: @property def label(self) -> str: """ when asking questions of the user, hinting at who is asking is helpful. If the label should have more to it than the content of the label property, then the subclass should override this (eg, the subjectttype label is from 'skos:prefLabel', but the logset label is "this logset {dct:title}" """ #return self.properties.one(self.label_property, 'this') #return self.get_one_value(self.label_property, context) or 'this' # Note: this should not trigger getters, so we use self.properties # not self.get_one_value: #self._label = self.get_one_value(self.label_property) or self.label_alternate if self._label is None: candidates = self.properties[self.label_property] if len(candidates) > 0: self._label = candidates.pop() else: self._label = self.label_alternate return self._label def __str__(self): # note that this should not trigger getters, so we use _uri not uri return "{0}: {1}".format(self.label, str(self._uri)) @property def uri(self): # many subclasses will override this to lazily set a uri based on # the value of a property return self._uri @uri.setter def uri(self, value): self._uri = value def add_to_graph(self, context: Context = None): if self._in_graph: logging.debug("already in graph, skipping") return if context is None: context = Context() # describe my properties first, so subclasses can use them to generate a helpful uri if necessary: # I think that to avoid loops we need to do all of the asking (the user) # before doing any of the adding to graph: triples = [] context.push(node=self) for predicate in self.properties: context.push(predicate=predicate) # need to convert string eg foaf:name to an actual uri for adding # to graph: logging.debug("calling Graph.geturi on {0}".format(predicate)) pred_uri = graph.geturi(predicate) logging.debug("calling get_values with {0}, {1}".format( str(predicate), str(context))) for v in self.get_values(predicate, context): if isinstance(v, Identifier): triples.append((self.uri, pred_uri, v)) elif isinstance(v, Node): triples.append((self.uri, pred_uri, v.uri)) else: # I'm pretty sure this should never happen raise Exception("oh oh! " + str(v) + " ... " + str(type(v))) context.pop(('predicate', )) context.pop(('node', )) for triple in triples: logging.debug("adding triple {0}".format(triple)) self.graph.add(triple) # finally, describe me: rdf = graph.getns('rdf') myclass = graph.geturi(self.rdf_class) logging.info("adding me to graph: {0}, {1}, {2}".format( self.uri, str(rdf.type), str(myclass))) self.graph.add((self.uri, rdf.type, myclass)) self._in_graph = True # some common getters: def skip(self, context: Context) -> PropertyValues: """ if it's not there, don't include it """ logging.debug("calling skip wiht {0}".format(str(context))) return set() def abort(self, context: Context) -> None: """ if it's not there, something is badly wrong """ logging.debug("calling abort wiht {0}".format(str(context))) predicate = str(context['predicate']) msg = "{0} {1} missing predicate {2}".format(self.rdf_class, str(self.uri), predicate) raise Exception(msg) def ask(self, context: Context) -> PropertyValues: """ ask user for simple text descriptions (subclasses should override if needing something more complicated) """ logging.debug("calling ask wiht {0}".format(str(context))) retval = set() predicate = context['predicate'] logging.debug("asking about {0} (label property is {1}".format( str(predicate), str(self.label_property))) label = self.label_alternate if predicate == self.label_property else self.label prompt = self.prompts.get(predicate) if prompt is not None: insist = predicate in self.required_properties value = UI.ask(prompt.format(label), insist=insist) if value != '': #self._properties.add(pred, Literal(value)) retval.add(Literal(value)) return retval def truefalse(self, context: Context) -> PropertyValues: prompt = self.prompts.get(context['predicate']) choice = UI.truefalse(prompt) response = 'true' if choice else 'false' xsd = graph.getns('xsd') logging.debug("truefalse returning " + response) retval = set([Literal(response, datatype=xsd.boolean)]) return retval def select(self, context: Context) -> PropertyValues: """ get a list of existing nodes of appropriate type (via the target class), and ask the user to select one, or create a new one """ logging.debug("calling select with {0}".format(str(context))) predicate = context['predicate'] label = self.label_alternate if predicate == self.label_property else self.label return self.select_from_known(context, label, multi=False) #@classmethod def multi_select(self, context: Context) -> PropertyValues: logging.debug("calling multi_select with {0}".format(str(context))) predicate = context['predicate'] label = self.label_alternate if predicate == self.label_property else self.label return self.select_from_known(context, label) @classmethod def select_from_known( cls, context: Context = Context(), label=None, multi=True, allow_create=True, filters: Dict[str, str] = dict()) -> PropertyValues: """ classmethod select getter ...""" retval = set() predicate = context.get('predicate') target_cls = cls.targets.get(predicate, cls) known = list(target_cls.known(filters)) if label is None: label = target_cls.__name__ if multi: default_prompt = "Please select one or more {0} " default_prompt += "(space-separated{1} or empty when done) " #ui_method = UI.multi_select else: default_prompt = "Please select a {0}{1}" #ui_method = UI.select prompt_new = ", or (n)ew " if allow_create else "" additional = ['n'] if allow_create else [] prompt = (context.get('prompt') or cls.prompts.get(predicate) or default_prompt).format(label, prompt_new) if multi: selection = UI.multi_select(prompt, known, *additional) else: selection = [UI.select(prompt, known, *additional)] while True: # loop so user can create multiple new entries if len(selection) == 0: break # for multi, but with single it is same logic: for choice in selection: if allow_create and str(choice).lower() == 'n': # now it gets giddyingly recursive, make a new target_cls Node # and add that: classname = target_cls.__name__ label = UI.ask( "please give the new {0} a label: ".format(classname)) props = {target_cls.label_property: [label]} new = target_cls(properties=props) new.add_to_graph(context) uri = new.uri else: logging.info( f"got choice {choice} from selection {selection}") #obj = known[choice] obj = choice uri = obj.uri retval.add(uri) selection = UI.multi_select("more? ", known, 'n') if multi else [] #selection = ui_method("more? ", known, 'n') if multi else [] return retval def make_uri(self, prop, namespace): """ make a candidate uri that is human-readable if possible based on the value of a selected property. If that property is not set, or the uri would clash with one that already exists, add some random characters """ # make an rdf-friendly name: try: name = re.sub('^[^A-Za-z]+|\W', '', self.properties.one(prop)) except KeyError: name = ran_str(8) # does it exist already? while existing in self.graph.predicate_objects(namespace[name]): # yep, choose a new name by adding some random characters to the end name = '{0}_{1}'.format(name, ran_str(4)) # if len(name)==0: # name = ran_str(8) # else: # # does it exist already? # logging.info("makeing a uri in namespace {0}".format(namespace)) # for existing in self.graph.predicate_objects(namespace[name]): # # yep, better choose a new name # name = '{0}_{1}'.format(name, ran_str(4)) # break # logging.info("makeuri returning {0}".format(name)) return namespace[name]
def known(cls, filters: Dict[str, str] = dict()): """ generator-constructor over known nodes of a given type: """ # TODO instead of sampling property values, use "order by" and a douple # loop to actually get the full set of properties for a uri # (the general finder query is like: # select ?uri <other fields> <optional fields where { # ?uri a <self.rdf_class> . # ?uri <other predicate> <other variable> . # optional { # ?uri <other predicate> <other variable> . # } } order by ?uri # then make a multi-dict of the properties # so: # - first add a class var: required: list of required properties # # # list of variables: # nrequired = len(self.required) # optionals = [ key for key in self.getters if key not in self.required ] # ntotal = nrequired + len(optionals) # required = [ '?v{:d}'.format(i) for i in range(nrequired) ] # optional = [ '?v{:d}'.format(i) for i in range(nrequired,ntotal) ] # query = "SELECT ?uri " + ' '.join(self.required) + ' '.join(optionals) # query += " WHERE { " # if self.rdf_superclass is None: # query += " ?uri a {0} .".format(self.rdf_class) # else: # query += " ?uri a ?type ." # query += " ?type rdfs:subClassOf* {0} .".format(self.rdf_superclass) # for clause in zip(self.required, required): # query += " ?uri {0} {1} . ".format(clause[0], clause[1]) # if noptional > 0: # query += " OPTIONAL { " # for clause in zip(optionals, optional): # query += " ?uri {0} {1} . ".format(clause[0], clause[1]) # query += " } " # query += " } ORDER BY ?uri " # curr = None # next = None # for row in Graph.graph.query(query): # next = row[0] # the uri # if next != curr: # mdict = Multidict(next) # if curr is not None: # yield cls(properties=mdict) # curr = next # # add each var to mdict # for key,val in zip(required+optional,row): # mdict.add(key, [val]) # yield cls(properties=mdict) # the last one # # or easier still: since we don't actually enforce that certain properties are # required, make everything optional: # # if filters has uris/rdflib identifiers, then to convert to str they need < > around it # but if they are a string like 'ddict:someThing' then they should stay as they are: as_str = lambda x: "<{0}>".format(str(x)) if isinstance(x, Identifier ) else x logging.debug("filters has: {0}".format(filters)) preds = [k for k in cls.getters.keys()] # impose an order qvars = ['?v{:d}'.format(i) for i in range(len(preds))] query = "SELECT ?uri {0} WHERE {{ ".format(' '.join(qvars)) if cls.rdf_superclass is None: query += "?uri a {0} . ".format(cls.rdf_class) else: query += "?uri a ?type . " query += "?type rdfs:subClassOf* {0} . ".format(cls.rdf_superclass) for pred, var in zip(preds, qvars): if pred in filters: #query += "?uri {0} {1} . ".format(pred,str(filters[pred])) query += "?uri {0} {1} . ".format(pred, as_str(filters[pred])) else: query += "OPTIONAL {{ ?uri {0} {1} . }} ".format(pred, var) query += "} ORDER BY ?uri " logging.debug("query is: {0}".format(query)) curr_uri = None next_uri = None mdict: MultiDict = None #for row in graph.Graph.the_graph.query(query): for row in graph.query(query): logging.debug("found {0}".format(str(row))) next_uri = row[0] # the uri if next_uri != curr_uri: if curr_uri is not None: logging.debug("making a {0} with props {1}".format( cls.__name__, str(mdict))) yield cls(properties=mdict) mdict = MultiDict(uri=[next_uri]) curr_uri = next_uri # add each var to mdict for key, val in zip(preds, row[1:]): mdict.add(key, val) if mdict is not None: logging.debug("making a {0} with props {1}".format( cls.__name__, str(mdict))) yield cls(properties=mdict) # the last one else: logging.debug("no nodes of type {0} found".format(cls.__name__))
class SolrQuery(object): separators = {} def __init__(self, connections=None, index=None): self.params = MultiDict() self.connections = connections self.conn = self.connections.get('main', None) self.index = index self.params_joined = False def use_index(self, index): self.index = index def handle_row(self, row): return self.index(row) def join_params(self): return MultiDict((k,self.separators[k].join([unicode(v) for v in val]) if k in self.separators else unicode(val)) for k,val in self.params.iteritems()) def execute(self, handler=handle_row, conn='main'): if not self.params_joined: self.params=self.join_params() self.params_joined = True return SolrResult(self._execute_search(), handler=partial(handler, self)) def query(self, query): self.params.add('q', query) return self def filter(self, *filters): [self.params.add('fq', filter) for filter in filters] return self def facet(self, field, method='enum', missing_facet=NotGiven, sort=NotGiven, min_count=1): self.params['facet'] = 'true' field_name = SolrQuery._name_or_val(field) self.params.add('facet.field', field_name) self.params['f.%s.facet.method' % field_name] = method if sort != NotGiven: self.params['f.%s.facet.sort' % field_name] = sort if missing_facet != NotGiven: self.params['f.%s.facet.missing' % field_name] = missing_facet self.params['f.%s.facet.mincount' % field_name] = min_count return self def facet_date(self, date_field, start_date=NotGiven, end_date=NotGiven, gap=NotGiven, hard_end=NotGiven, other=NotGiven): self.params['facet'] = 'true' field_name = SolrQuery._name_or_val(date_field) self.params.add('facet.date', field_name) if start_date != NotGiven: self.params['f.%s.facet.date.start' % field_name] = start_date if end_date != NotGiven: self.params['f.%s.facet.date.end' % field_name] = end_date if gap != NotGiven: self.params['f.%s.facet.date.gap' % field_name] = gap if hard_end != NotGiven: self.params['f.%s.facet.date.hardend' % field_name] = hard_end if other != NotGiven: self.params['f.%s.facet.date.other' % field_name] = other return self def facet_query(self, *queries): self.params['facet'] = 'true' [self.params.add('facet.query', query) for query in queries] return self def facet_prefix(self, field, prefix): self.facet(field) self.params['facet.prefix'] = prefix return self def default_operator(self, op): self.params['q.op'] = op return self def default_field(self, field): self.params['df'] = field return self def def_type(self, type): self.params['defType'] = type return self def tie(self, tie): self.params['tie'] = tie return self def default_query(self, query): self.params['q.alt'] = query return self @join_with_separator(separators, 'mm', ' ') def min_should_match(self, *args): self._add_items_to_key('mm', *args) return self def query_type(self, query_type): self.params['qt'] = query_type return self def paginate(self, limit, offset): self.params['start'] = limit self.params['rows'] = offset return self def limit(self, limit): self.params['rows'] = limit return self def offset(self, offset): self.params['start'] = offset return self @join_with_separator(separators, 'bf', ' ') def boost(self, *fields): self._add_items_to_key('bf', *fields) return self @join_with_separator(separators, 'pf', ' ') def phrase_boost(self, *fields): self._add_items_to_key('pf', *fields) return self def query_slop(self, slop): self.params['qs'] = slop return self def phrase_slop(self, slop): self.params['ps'] = slop return self def boost_query(self, query): self.params.add('bq', query) @staticmethod def _name_or_val(arg): return arg.solr_field_name if hasattr(arg, 'solr_field_name') else arg # Emulating defaultdict but on MultiDict def _add_items_to_key(self, param, *args): list_args = self.params.get(param, []) list_args.extend([SolrQuery._name_or_val(arg) for arg in args]) self.params[param] = list_args @join_with_separator(separators, 'qf', ' ') def queried_fields(self, *fields): self._add_items_to_key('qf', *fields) return self def dismax_of(self, *fields): # Wrapper for queries using the dismax query parser self.def_type('dismax') self.queried_fields(*fields) self.tie(0.1) return self @join_with_separator(separators, 'fl', ',') def fields(self, *args): self._add_items_to_key('fl', *args) return self @join_with_separator(separators, 'sort', ',') def sort_by(self, *args): self._add_items_to_key('sort', *args) return self def indent(self, indent): self.params['indent']=indent return self def debug(self, debug): self.params['debugQuery'] = debug return self def echo_handler(self, echo): self.params['echoHandler'] = echo return self def echo_params(self, echo): self.params['echoParams'] = echo return self def _execute_search(self): return self.conn.search(self)
def __init__(self, connections=None, index=None): self.params = MultiDict() self.connections = connections self.conn = self.connections.get('main', None) self.index = index self.params_joined = False
def __init__(self, properties: MultiDict = MultiDict()) -> None: super().__init__(properties) self._fmtInfo = None # MultiDict() self._filePatterns = None self._logFormatType = None
class LogSeries(Node): rdf_class = "logset:LogSeries" getters = { 'rdfs:label': 'ask', 'logset:logFormatType': 'select', 'logset:infoType': 'select', 'logset:subjectType': 'select', 'logset:logFormatInfo': 'skip' # need to work this out } required_properties = set([ 'rdfs:label', 'logset:logFormatType', 'logset:infoType', 'logset:subjectType' ]) prompts = { 'rdfs:label': 'Give {0} a short identifying lael: ', 'logset:logFormatType': 'what logformattype is {0}? (TODO get a better question!) ', 'logset:infoType': 'what type of information does {0} hold? ', 'logset:subjectType': 'what type of system/component is {0} about? ' } targets = { 'logset:infoType': InfoType, 'logset:subjectType': SubjectType, 'logset:logFormatType': LogFormatType } # finder_query = ''' SELECT ?uri (SAMPLE(?label) as ?label) # (SAMPLE(?fmttype) as ?fmttype) # (SAMPLE(?infotype) as ?infotype) # (SAMPLE(?subjtype) as ?subjtype) WHERE { # ?uri a logset:LogSeries . # ?uri rdfs:label ?label . # ?uri logset:logFormatType ?fmttype . # ?uri logset:infoType ?infotype . # ?uri logset:subjectType ?subjtype . # } GROUP BY ?uri # ''' # finder_fields = [ 'uri', 'rdfs:label', 'logset:logFormatType', # 'logset:infoType', 'logset:subjectType' ] label_property: str = 'rdfs:label' label_alternate: str = 'log series' def __init__(self, properties: MultiDict = MultiDict()) -> None: super().__init__(properties) self._fmtInfo = None # MultiDict() self._filePatterns = None self._logFormatType = None @property def fmtInfo(self) -> MultiDict: if self._fmtInfo is None: self._fmtInfo = MultiDict() for prop in self.get_values('logset:logFormatInfo'): key, sep, val = prop.partition('=') self._fmtInfo.add(key, val) return self._fmtInfo # logging.info("prop is {0}".format(prop)) # logging.info("values is {0}".format()) # for value in prop: # key,sep,val = prop.partition('=') # logging.info("found key {0} and val {1}".format(key,val)) # self._fmtInfo.add(key, val) @property def filePatterns(self) -> List: """ return a list, sorted by pattern length (as a proxy for "most specific to least specific") of filename patterns that generally match this logseries """ if self._filePatterns is None: pats = set() tags = re.compile('(<[\w:]+>)') for pattern in self.fmtInfo['filepattern']: # convert from a tag-based-pattern to a regex: parts = tags.split(pattern) # every 2nd part will be a tag, replace it with corresponding pattern tagpatterns = [tagPattern(t) for t in parts[1::2]] regex_p = ''.join(sum(zip(parts[:-1:2], tagpatterns), ())) + parts[-1] pats.add(regex_p) self._filePatterns = [ re.compile(p) for p in sorted(pats, key=len, reverse=True) ] return self._filePatterns @property def logFormatType(self): if self._logFormatType is None: handlerName = self.get_one_value('logset:logFormatType') #self._logFormatType = LogFormatType.handlers[handlerName] return self._logFormatType # logset:logFormatInfo "filepattern=console-<date:YYYYMMDD>$" ; # logset:logFormatInfo "filepattern=console$" ; # logset:logFormatInfo "ts_words=0" ; # logset:logFormatInfo "part_word=1" ; @property def uri(self): # lazily find uri so there is a chance of deriving a readable one if self._uri is None: ns = self._namespace or localdict_ns() self._uri = self.make_uri(self.label_property, ns) return self._uri def identify_subjects(self, subject_list: List[URIRef]) -> URIRef: """ given a list of high-level subjects, look for specific subject that are a partOf one of these and an isSpecific of the subjecttypes relevant to this logseries. Returns a list of uris """ as_str = lambda x: "<{0}>".format(str(x)) if isinstance(x, Identifier ) else x subjtypes = ', '.join( [as_str(uri) for uri in self.get_values('logset:subjectType')]) #logging.debug("my subtypes are: {0}".format(subjtypes)) subjects = [] for subj in subject_list: # find subjects whose subjecttpye corresponds with subjtype # and that are partOf something in the subjectlist q = ''' SELECT ?uri WHERE {{ ?uri a logset:Subject . ?uri logset:isSpecific ?type . ?uri logset:partOf* {0} . FILTER ( ?type in ({1}) ) . }} '''.format(as_str(subj), subjtypes) subjects += [row[0] for row in self.graph.query(q)] #logging.debug("query is: {0}".format(q)) return subjects