def WMapToHtml(theMap, propData): sys.stderr.write("WMapToHtml len=%d\n" % len(theMap)) for urlSubj in theMap: (subjText, subjEntityGraphClass, subjEntityId) = lib_naming.ParseEntityUri( lib_util.urllib_unquote(urlSubj)) WrtAsUtf("<tr>") WrtAsUtf("<td valign='top'><a href='%s'>%s</a></td>" % (str(urlSubj), subjText)) WrtAsUtf("<td>") WrtAsUtf("<table>") for theProp, urlObj in theMap[urlSubj]: WrtAsUtf("<tr>") propNam = lib_exports.PropToShortPropNam(theProp) WrtAsUtf("<td><i>%s</i></td>" % propNam) if lib_kbase.IsLiteral(urlObj): WrtAsUtf("<td>%s</td>" % (str(urlObj))) else: (objText, objEntityGraphClass, objEntityId) = lib_naming.ParseEntityUri( lib_util.urllib_unquote(urlObj)) WrtAsUtf("<td><a href='%s'>%s</a></td>" % (str(urlObj), objText)) WrtAsUtf("</tr>") WrtAsUtf("</table>") WrtAsUtf("</td>") WrtAsUtf("</tr>")
def FieldsToHtmlVertical(grph, the_fields): props = {} idx = 0 # TODO: The sort must put at first, some specific keys. # For example, sources_top/nmap_run.py, the port number as an int (Not a string) # Also, filenames, case-sensitive or not. for (key, val) in sorted(the_fields): # This should come first, but it does not so we prefix with "----". Hack ! if key == pc.property_information: # Completely left-aligned. Col span is 2, approximate ratio. val = lib_exports.StrWithBr(val, 2) currTd = "<td align='left' balign='left' colspan='2'>%s</td>" % val elif IsFlatProperty(key): urlTxt = lib_naming.ParseEntityUri(val)[0] splitTxt = lib_exports.StrWithBr(urlTxt, 2) # The text of the link must be underlined. currTd = '<td href="%s" align="left" colspan="2">%s</td>' % ( val, lib_exports.DotUL(splitTxt)) else: key_qname = qname(key, grph) # This assumes: type(val) == 'rdflib.term.Literal' # sys.stderr.write("FORMAT ELEMENT: %s\n" %(dir(val))) if lib_kbase.IsLiteral(val): currTd = FormatPair(key_qname, val.value) else: currTd = FormatPair(key_qname, val) props[idx] = currTd idx += 1 return props
def fields_to_html_vertical(grph, the_fields): props = {} idx = 0 # TODO: The sort must put at first, some specific keys. # For example, sources_top/nmap_run.py, the port number as an int (Not a string) # Also, filenames, case-sensitive or not. for key, val in sorted(the_fields): # This should come first, but it does not so we prefix with "----". Hack ! if key == pc.property_information: # Completely left-aligned. Col span is 2, approximate ratio. val = _str_with_br(val, 2) curr_td = "<td align='left' balign='left' colspan='2'>%s</td>" % val elif is_flat_property(key) : url_txt = lib_naming.ParseEntityUri(val)[0] split_txt = _str_with_br(url_txt, 2) # The text of the link must be underlined. curr_td = '<td href="%s" align="left" colspan="2">%s</td>' % (val, _dot_ul(split_txt)) else: key_qname = lib_kbase.qname(key, grph) # This assumes: type(val) == 'rdflib.term.Literal' if lib_kbase.IsLiteral(val): curr_td = _format_pair(key_qname, val.value) else: curr_td = _format_pair(key_qname, val) props[idx] = curr_td idx += 1 return props
def callback_grph_add(trpl, depth_call): a_subject, a_predicate, an_object = trpl if a_predicate == pc.property_script: # Directories of scripts are also labelled with the same predicate # although they are literates and not urls. if not lib_kbase.IsLiteral(an_object): list_scripts.append(an_object)
def WMapToHtml(theMap): DEBUG("WMapToHtml len=%d", len(theMap)) for urlSubj in theMap: (subjText, subjEntityGraphClass, subjEntityId) = lib_naming.ParseEntityUri( lib_util.urllib_unquote(urlSubj)) yield ("<tr>") yield ("<td valign='top'><a href='%s'>%s</a></td>" % (str(urlSubj), subjText)) yield ("<td>") yield ("<table border=0>") for theProp, urlObj in theMap[urlSubj]: yield ("<tr>") propNam = lib_exports.PropToShortPropNam(theProp) yield ("<td><i>%s</i></td>" % propNam) if lib_kbase.IsLiteral(urlObj): yield ("<td>%s</td>" % (str(urlObj))) else: (objText, objEntityGraphClass, objEntityId) = lib_naming.ParseEntityUri( lib_util.urllib_unquote(urlObj)) yield ("<td><a href='%s'>%s</a></td>" % (str(urlObj), objText)) yield ("</tr>") yield ("</table>") yield ("</td>") yield ("</tr>")
def w_map_to_html(the_map): """This callback receives a RDF property (WBEM or WMI) and a map which represents the CIM links associated to the current object. """ logging.debug("w_map_to_html len=%d", len(the_map)) for url_subj in the_map: unquoted_subj = lib_util.urllib_unquote(url_subj) subj_text, subj_entity_graph_class, subj_entity_id = lib_naming.ParseEntityUri( unquoted_subj) yield "<tr>" yield "<td valign='top'><a href='%s'>%s</a></td>" % (str(url_subj), subj_text) yield "<td>" yield "<table border=0>" for the_prop, url_obj in the_map[url_subj]: yield "<tr>" prop_nam = lib_exports.PropToShortPropNam(the_prop) yield "<td><i>%s</i></td>" % prop_nam if lib_kbase.IsLiteral(url_obj): yield "<td>%s</td>" % str(url_obj) else: unquoted_obj = lib_util.urllib_unquote(url_obj) obj_text, obj_entity_graph_class, obj_entity_id = lib_naming.ParseEntityUri( unquoted_obj) yield "<td><a href='%s'>%s</a></td>" % (str(url_obj), obj_text) yield "</tr>" yield "</table>" yield "</td>" yield "</tr>"
def HasUnivAlias(anObject): if lib_kbase.IsLiteral(anObject): return False if ( anObject.find("entity.py") >= 0 ) or ( anObject.find("entity_wbem.py") >= 0 ) or( anObject.find("entity_wmi.py") >= 0 ): return True return False
def CallbackGrphAdd( trpl, depthCall ): #sys.stdout.write("CallbackGrphAdd:%s %d\n"%(str(trpl),depthCall)) aSubject,aPredicate,anObject = trpl if aPredicate == pc.property_script: # Directories of scripts are also labelled with the same predicate # although they are literates and not urls. if not lib_kbase.IsLiteral(anObject): listScripts.append( anObject )
def _has_univ_alias(an_object): if lib_kbase.IsLiteral(an_object): return False if (an_object.find("entity.py") >= 0) or (an_object.find("entity_wbem.py") >= 0) or (an_object.find("entity_wmi.py") >= 0): return True return False
def _url_cleanup(url_node): url_as_str = str(url_node) url_as_str = url_as_str.replace(" ", "%20") url_as_str = url_as_str.replace("\\", "%5C") url_as_str = url_as_str.replace("[", "%5B") url_as_str = url_as_str.replace("]", "%5D") url_as_str = url_as_str.replace("{", "%7B") url_as_str = url_as_str.replace("}", "%7D") if lib_kbase.IsLiteral(url_node): url_node = lib_kbase.MakeNodeLiteral(url_as_str) else: url_node = lib_kbase.MakeNodeUrl(url_as_str) return url_node
def _url_cleanup(url_node): """This is needed for GraphDB which does not accept spaces and backslashes in URL. With this, a graph can be uploaded in OntoText.""" url_as_str = str(url_node) url_as_str = url_as_str.replace(" ", "%20") url_as_str = url_as_str.replace("\\", "%5C") url_as_str = url_as_str.replace("[", "%5B") url_as_str = url_as_str.replace("]", "%5D") url_as_str = url_as_str.replace("{", "%7B") url_as_str = url_as_str.replace("}", "%7D") if lib_kbase.IsLiteral(url_node): url_node = rdflib.Literal(url_as_str) else: url_node = rdflib.term.URIRef(url_as_str) return url_node
def AddOntology(old_grph): DEBUG("AddOntology") map_classes = {} map_attributes = {} new_grph = lib_kbase.MakeGraph() # This takes the class from an Url and defines it in the RDF ontology. # This returns the class name as a string. def _define_class_in_ontology(url_node): (entity_label, class_name, entity_id) = lib_naming.ParseEntityUri(url_node) # This could be: ("http://the_host", "http://primhillcomputers.com/survol/____Information", "HTTP url") if not class_name: return None # TODO: Define base classes with rdfs:subClassOf / RDFS.subClassOf # "base_class" and "class_description' ??? # A class name with the WMI namespace might be produced with this kind of URL: # "http://www.primhillcomputers.com/survol#root\CIMV2:CIM_Process" class_name = class_name.replace("\\", "%5C") if not class_name in map_classes: if class_name == "": raise Exception("No class name for url=%s type=%s" % (str(url_node), str(type(url_node)))) # Maybe this CIM class is not defined as an RDFS class. # This function might also filter duplicate and redundant insertions. lib_util.AppendClassSurvolOntology(class_name, map_classes, map_attributes) # The entity_id is a concatenation of CIM properties and define an unique object. # They are different of the triples, but might overlap. entity_id_dict = lib_util.SplitMoniker(entity_id) for predicate_key in entity_id_dict: if not predicate_key in map_attributes: # This function might also filter a duplicate and redundant insertion. lib_util.AppendPropertySurvolOntology( predicate_key, "CIM key predicate %s" % predicate_key, class_name, None, map_attributes) # This value is explicitly added to the node. predicate_value = entity_id_dict[predicate_key] new_grph.add((url_node, lib_properties.MakeProp(predicate_key), lib_kbase.MakeNodeLiteral(predicate_value))) # This adds a triple specifying that this node belongs to this RDFS class. lib_kbase.AddNodeToRdfsClass(new_grph, url_node, class_name, entity_label) return class_name # This is needed for GraphDB which does not accept spaces and backslashes in URL. def _url_cleanup(url_node): url_as_str = str(url_node) url_as_str = url_as_str.replace(" ", "%20") url_as_str = url_as_str.replace("\\", "%5C") url_as_str = url_as_str.replace("[", "%5B") url_as_str = url_as_str.replace("]", "%5D") url_as_str = url_as_str.replace("{", "%7B") url_as_str = url_as_str.replace("}", "%7D") if lib_kbase.IsLiteral(url_node): url_node = lib_kbase.MakeNodeLiteral(url_as_str) else: url_node = lib_kbase.MakeNodeUrl(url_as_str) return url_node for node_subject, node_predicate, node_object in old_grph: node_subject = _url_cleanup(node_subject) node_object = _url_cleanup(node_object) if node_predicate == pc.property_script: # The subject might be a literal directory containing provider script files. if not lib_kbase.IsLiteral(node_subject): if lib_kbase.IsLiteral(node_object): new_grph.add((node_subject, lib_kbase.PredicateSeeAlso, node_object)) else: str_object = str(node_object) str_object_rdf = str_object + "&mode=rdf" node_object_rdf = lib_kbase.MakeNodeUrl(str_object_rdf) new_grph.add((node_subject, lib_kbase.PredicateSeeAlso, node_object_rdf)) elif node_predicate == pc.property_information: new_grph.add( (node_subject, lib_kbase.PredicateComment, node_object)) else: class_subject = _define_class_in_ontology(node_subject) if not lib_kbase.IsLiteral(node_object): class_object = _define_class_in_ontology(node_object) else: class_object = None name_predicate, dict_predicate = lib_exports.PropToShortPropNamAndDict( node_predicate) try: description_predicate = dict_predicate["property_description"] except: description_predicate = "" if class_subject and (name_predicate not in map_attributes): # This function might also filter a duplicate and redundant insertion. lib_util.AppendPropertySurvolOntology(name_predicate, description_predicate, class_subject, class_object, map_attributes) # TODO: Add the property type. Experimental because we know the class of the object, or if this is a literal. new_grph.add((node_subject, node_predicate, node_object)) lib_kbase.CreateRdfsOntology(map_classes, map_attributes, new_grph) DEBUG( "AddOntology len(grph)=%d map_classes=%d map_attributes=%d len(new_grph)=%d", len(new_grph), len(map_classes), len(map_attributes), len(new_grph)) return new_grph
def _add_ontology(old_grph): """ This receives a triplestore containing only the information from scripts. This adds the classes and the properties information, in order to send it to an external database or system. This returns a new graph. """ map_classes = {} map_attributes = {} new_grph = rdflib.Graph() def _define_class_in_ontology(url_node): """This takes the class from an Url and defines it in the RDF ontology. This returns the class name as a string.""" entity_label, class_name, entity_id = lib_naming.ParseEntityUri( url_node) # This could be: ("http://the_host", "http://primhillcomputers.com/survol/____Information", "HTTP url") if not class_name: return None # TODO: Define base classes with rdfs:subClassOf / RDFS.subClassOf # "base_class" and "class_description' ??? # A class name with the WMI namespace might be produced with this kind of URL: # "http://www.primhillcomputers.com/survol#root\CIMV2:CIM_Process" class_name = class_name.replace("\\", "%5C") assert isinstance(class_name, str) if class_name not in map_classes: if class_name == "": raise Exception("No class name for url=%s type=%s" % (str(url_node), str(type(url_node)))) # Maybe this CIM class is not defined as an RDFS class. # This function might also filter duplicate and redundant insertions. lib_util.AppendClassSurvolOntology(class_name, map_classes, map_attributes) # The entity_id is a concatenation of CIM property-value paris, and define an unique object. # They are different of the triples, but might overlap. entity_id_dict = lib_util.SplitMoniker(entity_id) for predicate_key in entity_id_dict: if predicate_key not in map_attributes: # This function might also filter a duplicate and redundant insertion. lib_util.append_property_survol_ontology( predicate_key, "CIM key predicate %s" % predicate_key, class_name, None, map_attributes) # This value is explicitly added to the node. predicate_value = entity_id_dict[predicate_key] new_grph.add((url_node, lib_properties.MakeProp(predicate_key), rdflib.Literal(predicate_value))) # This adds a triple specifying that this node belongs to this RDFS class. lib_kbase.add_node_to_rdfs_class(new_grph, url_node, class_name, entity_label) return class_name def _url_cleanup(url_node): """This is needed for GraphDB which does not accept spaces and backslashes in URL. With this, a graph can be uploaded in OntoText.""" url_as_str = str(url_node) url_as_str = url_as_str.replace(" ", "%20") url_as_str = url_as_str.replace("\\", "%5C") url_as_str = url_as_str.replace("[", "%5B") url_as_str = url_as_str.replace("]", "%5D") url_as_str = url_as_str.replace("{", "%7B") url_as_str = url_as_str.replace("}", "%7D") if lib_kbase.IsLiteral(url_node): url_node = rdflib.Literal(url_as_str) else: url_node = rdflib.term.URIRef(url_as_str) return url_node for node_subject, node_predicate, node_object in old_grph: node_subject = _url_cleanup(node_subject) node_object = _url_cleanup(node_object) if node_predicate == pc.property_script: # The subject might be a literal directory containing provider script files. if not lib_kbase.IsLiteral(node_subject): # The object might be a literal title of a sublist of scripts. if not lib_kbase.IsLiteral(node_object): # Things like: subject='Languages', object='CDB%20Windows%20debugger' logging.warning("RDF object %s is a literal. Subject=%s", node_object, node_subject) new_grph.add((node_subject, lib_kbase.PredicateSeeAlso, node_object)) else: # FIXME: Maybe it already contains a mode, maybe it has no CGI args yet. str_object = str(node_object) str_object_rdf = str_object + "&mode=rdf" node_object_rdf = rdflib.term.URIRef(str_object_rdf) new_grph.add((node_subject, lib_kbase.PredicateSeeAlso, node_object_rdf)) else: # Things like: subject='Languages', object='Python%20processes' logging.warning("RDF subject %s is a literal. Object=%s", node_subject, node_object) elif node_predicate == pc.property_information: new_grph.add( (node_subject, lib_kbase.PredicateComment, node_object)) else: class_subject = _define_class_in_ontology(node_subject) if not lib_kbase.IsLiteral(node_object): class_object = _define_class_in_ontology(node_object) else: class_object = None name_predicate, dict_predicate = lib_exports.PropToShortPropNamAndDict( node_predicate) try: description_predicate = dict_predicate["property_description"] except: description_predicate = "" if class_subject and (name_predicate not in map_attributes): # This function might also filter a duplicate and redundant insertion. assert isinstance(class_subject, str) lib_util.append_property_survol_ontology( name_predicate, description_predicate, class_subject, class_object, map_attributes) # TODO: Add the property type. Experimental because we know the class of the object, or if it is a literal. new_grph.add((node_subject, node_predicate, node_object)) lib_ontology_tools._convert_ontology_to_rdf(map_classes, map_attributes, new_grph) logging.debug( "len(grph)=%d map_classes=%d map_attributes=%d len(new_grph)=%d", len(new_grph), len(map_classes), len(map_attributes), len(new_grph)) return new_grph
def _process_collapsed_properties(prop_nam): dict_collapsed_subjects_to_object_lists = dict_props_collapsed_subjects_to_object_lists[prop_nam] logfil.write(lib_util.TimeStamp()+" Rdf2Dot: dict_collapsed_subjects_to_object_lists=%d.\n" % len(dict_collapsed_subjects_to_object_lists)) for subj_url, nod_lst in six.iteritems(dict_collapsed_subjects_to_object_lists): subj_nam = _rdf_node_to_dot_label(subj_url) subj_nam_tab = collapsed_label(prop_nam, subj_nam) try: # TODO: This logic adds an extra level of node: Try to flatten the tree. subj_nam = subj_nam_from_collapsed(prop_nam, subj_nam) except KeyError: pass # This points from the subject to the table containing the objects. # TODO: This color should be a parameter. stream.write(_pattern_edge_oriented % (subj_nam, subj_nam_tab, "GREEN", prop_nam)) lab_text, subj_entity_graphic_class, entity_id = lib_naming.ParseEntityUri(subj_url) # At the moment, two passes are necessary: # * A first pass to create the total list of fields, because they might be a bit different # from one record to the other. The column names of these fields get an unique index number # and can therefore be sorted. # * A second pass uses these result, to display the lines. # # This could be faster by assuming that the first ten or twenty columns have all the fields. # We could then start the second pass, and if an undetected column is found, # then restart from the beginning, as it is done now. # Unique columns of the descendant of this subject. raw_fields_keys = set() for obj in nod_lst: # One table per node. raw_fields_keys.update(fld[0] for fld in fields_set[obj]) # Mandatory properties must come at the beginning of the columns of the header, with first indices. fields_keys_ordered = [] for fld_priority in _flat_properties_list: try: # Must always be appended. BUT IF THERE IS NO html_data, IS IT WORTH ? # If the property is never used, exception then next property. raw_fields_keys.remove(fld_priority) fields_keys_ordered.append(fld_priority) except KeyError: pass # This one is always removed because its content is concatenated at the first column. for fld_to_remove in [pc.property_information]: try: raw_fields_keys.remove(fld_to_remove) except KeyError: pass # Appends rest of properties which are the column names, alphabetically sorted. fields_keys = fields_keys_ordered + sorted(raw_fields_keys) # This assumes that the header columns are sorted by alphabetical order. key_indices = {name_key: index_key for (index_key, name_key) in enumerate(fields_keys, 1)} number_keys = len(key_indices)+1 # Apparently, no embedded tables. dict_html_lines = dict() for obj_uri in nod_lst: # One table per node. sub_obj_id = _rdf_node_to_dot_label(obj_uri) # Beware "\L" which should not be replaced by "<TABLE>" but this is not the right place. sub_nod_uri = obj_uri.replace('&', '&') try: sub_obj_nam, sub_entity_graphic_class, sub_entity_id = lib_naming.parse_entity_uri_short(obj_uri) except UnicodeEncodeError: logging.warning("UnicodeEncodeError error:%s", obj_uri) sub_obj_nam, sub_entity_graphic_class, sub_entity_id = ("Utf err 1", "Utf err 2", "Utf err 3") # If this is a script, always displayed on white, even if related to a specific entity. # THIS IS REALLY A SHAME BECAUSE WE JUST NEED THE ORIGINAL PROPERTY. if obj_uri.find("entity.py") < 0: obj_color_plain = "#FFFFFF" else: obj_color_plain = lib_patterns.EntityClassToColor(sub_entity_graphic_class) # This lighter color for the first column. obj_color_light = lib_patterns.color_lighter(obj_color_plain) # Some colors a bit clearer ? Or take the original color of the class ? td_bgcolor_plain = '<td BGCOLOR="%s" ' % obj_color_plain td_bgcolor_light = '<td BGCOLOR="%s" ' % obj_color_light # Some columns might not have a value. The first column is for the key. html_columns = [td_bgcolor_plain + " ></td>"] * number_keys # Just used for the vertical order of lines, one line per object. concatenated_info_values = "" for key, val in fields_set[obj_uri]: # TODO: This property is by default the sorting key: # TODO: This can be a parameter for lists of classes <MY_Class> # TODO: ... by adding triplets of the form: (<MY_Class>, sorting_key, pc.property_information) if key == pc.property_information: # This can be a short string only. # Instead of concatenation, consider a list, or use an unique delimiter. concatenated_info_values += val # If there is a key, it overrides sub_entity_id = "NOT_" + "PLAINTEXTONLY" # val continue # TODO: This is hard-coded. if is_flat_property(key) : # In fact, it might also be an internal URL with "entity.py" if lib_kbase.IsLiteral(val): if isinstance(val.value, (list, tuple)): str_html = _format_element_aux(val.value) tmp_cell = 'align="left">%s</td>' % str_html else: tmp_cell = 'align="left">%s</td>' % val.value else: # This displays objects in a table: The top-level object must be # in the same host, so there is no need to display a long label. val_title = lib_naming.parse_entity_uri_short(val)[0] assert isinstance(val_title, (six.text_type, six.binary_type)) # This could probably be replaced by "str" # There might be non-ascii characters such as accents etc... try: val_title.encode('ascii') except UnicodeEncodeError: val_title = "Not ascii" val_title_ul = _dot_ul(val_title) tmp_cell = 'href="%s" align="left" >%s</td>' % (val, val_title_ul) else: try: float(val) tmp_cell = 'align="right">%s</td>' % val except: # Wraps the string if too long. Can happen only with a literal. tmp_cell = 'align="left">%s</td>' % _str_with_br(val) idx_key = key_indices[key] html_columns[idx_key] = td_bgcolor_plain + tmp_cell if concatenated_info_values: title_key = concatenated_info_values else: title_key = sub_obj_nam # Maybe the first column is a literal, and not an object ? if sub_entity_id != "PLAINTEXTONLY": # TODO: WE SHOULD PROBABLY ESCAPE HERE TOO. # For example, this displays the column labelled with pc.property_information tmp_col_0 = 'port="%s" href="%s" align="LEFT" >%s</td>' % (sub_obj_id, sub_nod_uri, title_key) else: sub_nod_uri = lib_util.html_escape(sub_nod_uri) # For example, this displays the title of another table: Typically sub-scripts. # The title itself is not an URL. tmp_col_0 = 'port="%s" align="LEFT" >%s</td>' % (sub_obj_id, sub_nod_uri) html_columns[0] = td_bgcolor_light + tmp_col_0 # concatenated_info_values # Several scripts might have the same help text, so add a number. # "Title" => "Title" # "Title" => "Title/2" # "Title" => "Title/3" etc... # Beware that it is quadratic with the number of scripts with identical info. title_idx = 2 title_key_uniq = title_key while title_key_uniq in dict_html_lines: title_key_uniq = "%s/%d" % (title_key, title_idx) title_idx += 1 # TODO: The sorting order is based on these strings but should rather be based on the content. # TODO: For example, "(TUT_UnixProcess) Handle=10" comes before "(TUT_UnixProcess) Handle=2". # TODO: This is later sorted by the function lib_util.natural_sort_list. # TODO: Or: title_key_uniq should rather be replaced by the list of properties, for example. # TODO: By clicking on the column names, we could change the order. # TODO: Another possibility is to have a "key" metadata which would replace title_key_uniq. dict_html_lines[title_key_uniq] = "".join(html_columns) # Replace the first column by more useful information. num_nod_lst = len(nod_lst) # WBEM and WMI classes have the syntax: "ns1/ns2/ns3:class" and the class it self can have base classes. # Survol classes have the syntax: "dir/dir/dir/class": This considers that namespaces are not really # necessary and can be replaced by classes. Also, there is a one-to-one match between the class inheritance # tree and its directory. # If Survol had to be started from scratch, there would be one Python class per survol class, # and they would be stored in the top dir "root/cimv2" ... it is not too late ! # # This strips the upper directories: "mysql/instance" or "oracle/table", if this is a Survol class elt_nam = sub_entity_graphic_class.split("/")[-1] # This strips the namespace: "root/cimv2:CIM_LogicalElement", if this is a WBEM or WMI class. elt_nam = elt_nam.split(":")[-1] if not elt_nam: # TODO: This is not the right criteria. Must select if we are listing scripts. elt_nam = "script" elt_nam_plural = lib_grammar.to_plural(elt_nam, num_nod_lst) txt_elements = "%d %s" % (num_nod_lst, elt_nam_plural) header = '<td border="1">%s</td>' % _dot_bold(txt_elements) # TODO: Replace each column name with a link which sorts the line based on this column. # The order of columns could be specified with an extra cgi argument with the columns names. for key in fields_keys: column_title = lib_kbase.qname(key, grph) column_title = column_title.replace("_"," ").capitalize() header += "<td border='1'>%s</td>" % _dot_bold(column_title) # With an empty key, it comes first when sorting. dict_html_lines[""] = header # MAYBE SHOULD BE DONE TWICE !!!!! SEE ALSO ELSEWHERE !!!! subj_url_clean = subj_url.replace('&', '&') # BEWARE: The shape and the color of this HTML table is from the subjects, # because the elements can be of different classes, even if the share the same predicate. # TODO: Each row should have its own color according to its class. num_fields = len(fields_keys)+1 # The rows of this HTML table could belong to different classes: # What the shared is the predicate. Hence, the predicate, property name is used as a title. prop_nam_plural = lib_grammar.to_plural(prop_nam, None) help_text = "List of " + prop_nam_plural + " in " + lab_text # TODO: The title and the content are not necessarily of the same class. # lab_text_with_br is the first line of the table containing nodes linked with the # same property. Unfortunately we have lost this property. lab_text = _truncate_in_space(lab_text, 30) lab_text_with_br = _str_with_br(lab_text) lab_text_with_br += ": " + prop_nam # No object with this script. if entity_id == "PLAINTEXTONLY": subj_url_clean = "" # This color is the table's contour. lib_patterns.WritePatterned( stream, subj_entity_graphic_class, subj_nam_tab, help_text, '"#000000"', subj_url_clean, num_fields, lab_text_with_br, dict_html_lines)
def Filter(self, subjRdf, objRdf): return (subjRdf == self.m_node) and lib_kbase.IsLiteral( (objRdf))
def Grph2Menu(page_title, error_msg, isSubServer, parameters, grph): # For each node, the subscripts. Therefore it can only be a directory. NodesToItems = {} # Nodes of scripts which have a parent. NodesWithParent = set() # Later used to calculate the list of scripts which do not have a parent # directory: They will be displayed at the top of the contextual menu. SubjectNodes = set() NodeToMenuJsonObj = dict() # The name of each node. NodesToNames = dict() for subj, pred, obj in grph: if pred == pc.property_script: #sys.stderr.write("subj=%s\n"%str(subj)) #sys.stderr.write("obj=%s\n"%str(obj)) try: NodesToItems[subj].append(obj) except KeyError: NodesToItems[subj] = [obj] if lib_kbase.IsLiteral(obj): # This is the name of a subdirectory containing scripts. # sys.stderr.write("obj LITERAL=%s\n"%str(subj)) NodesToNames[obj] = obj NodesWithParent.add(obj) SubjectNodes.add(subj) elif pred == pc.property_information: if lib_kbase.IsLiteral(obj): #sys.stderr.write("subj=%s\n"%str(subj)) #sys.stderr.write("obj.value=%s\n"%obj.value) NodesToNames[subj] = obj.value else: raise "Cannot happen here also" else: pass TopLevelNodes = SubjectNodes - NodesWithParent # The output result must be sorted. def AddStuff(theNodList, depth=0): listJsonItems = {} for oneRdfNod in theNodList: #sys.stderr.write("oneRdfNod=%s\n"%oneRdfNod) oneJsonNod = {} # This should be the sort key. oneJsonNod["name"] = NodesToNames.get(oneRdfNod, "No name") # sys.stderr.write( (" " * depth) + "name=%s\n" % (oneJsonNod["name"]) ) oneJsonNod["url"] = oneRdfNod # Maybe it does not have subitems. try: lstItem = NodesToItems[oneRdfNod] oneJsonNod["items"] = AddStuff(lstItem, depth + 1) except KeyError: pass listJsonItems[oneRdfNod] = oneJsonNod return listJsonItems menuJson = AddStuff(TopLevelNodes) # sys.stderr.write("menuJson=%s\n"%str(menuJson)) # There is only one top-level element. oneMenuVal = {} for oneMenuKey in menuJson: oneMenuVal = menuJson[oneMenuKey]["items"] break #sys.stderr.write("menuJson=%s\n"%str(oneMenuVal)) WriteJsonHeader(json.dumps(oneMenuVal, sort_keys=True, indent=2))
def _filter_subject_object(self, subj_rdf, obj_rdf): return (subj_rdf == self.m_node ) and lib_kbase.IsLiteral(obj_rdf)
def ProcessCollapsedProperties(propNam): dictCollapsedSubjectsToObjectLists = dict_props_collapsed_subjects_to_object_lists[ propNam] logfil.write(TimeStamp() + " Rdf2Dot: dictCollapsedSubjectsToObjectLists=%d.\n" % (len(dictCollapsedSubjectsToObjectLists))) for subjUrl, nodLst in lib_util.six_iteritems( dictCollapsedSubjectsToObjectLists): subjNam = _rdf_node_to_dot_label(subjUrl) subjNamTab = CollapsedLabel(propNam, subjNam) try: # TODO: This logic adds an extra level of node: Try to flatten the tree. subjNam = SubjNamFromCollapsed(propNam, subjNam) except KeyError: pass # This points from the subject to the table containing the objects. # TODO: This color should be a parameter. stream.write(_pattern_edge_oriented % (subjNam, subjNamTab, "GREEN", propNam)) (labText, subjEntityGraphicClass, entity_id) = lib_naming.ParseEntityUri(subjUrl) # At the moment, two passes are necessary: # * A first pass to create the compte list of fields, because they might be a bit different # from one record to the other. The column names pf these fields get an unique index number # and can therefore be sorted. # * A second pass uses these result, to display the lines. # # This could be faster by assuming that the first ten columns have all the fields. # We could then start the second pass, and if an undetected column is found, # then restart from scratch. # Unique columns of the descendant of this subject. rawFieldsKeys = set() for obj in nodLst: # One table per node. rawFieldsKeys.update(fld[0] for fld in fieldsSet[obj]) # sys.stderr.write("rawFieldsKeys BEFORE =%s\n" % str(rawFieldsKeys) ) # Mandatory properties must come at the beginning of the columns of the header, with first indices. # BUG: Si on retire html de cette liste alors qu il y a des valeurs, colonnes absentes. # S il y a du html ou du RDF, on veut que ca vienne en premier. fieldsKeysOrdered = [] for fldPriority in _flat_properties_list: try: # Must always be appended. BUT IF THERE IS NO html_data, IS IT WORTH ? # TODO: Remove if not HTML and no sub-rdf. CGIPROP # If the property is never used, exception then next property. rawFieldsKeys.remove(fldPriority) fieldsKeysOrdered.append(fldPriority) except KeyError: pass # This one is always removed because its content is concatenated at the first column. for fldToRemove in [pc.property_information]: try: rawFieldsKeys.remove(fldToRemove) except KeyError: pass # Appends rest of properties, sorted. fieldsKeys = fieldsKeysOrdered + sorted(rawFieldsKeys) # sys.stderr.write("fieldsKeys=%s\n" % str(fieldsKeys) ) # This assumes that the header columns are sorted. keyIndices = { nameKey: indexKey for (indexKey, nameKey) in enumerate(fieldsKeys, 1) } numberKeys = len(keyIndices) + 1 # Apparently, no embedded tables. dictHtmlLines = dict() for objUri in nodLst: # One table per node. subObjId = _rdf_node_to_dot_label(objUri) # Beware "\L" which should not be replaced by "<TABLE>" but this is not the right place. subNodUri = objUri.replace('&', '&') try: (subObjNam, subEntityGraphicClass, subEntityId) = lib_naming.ParseEntityUriShort(objUri) except UnicodeEncodeError: WARNING("UnicodeEncodeError error:%s", objUri) (subObjNam, subEntityGraphicClass, subEntityId) = ("Utf problem1", "Utf problem2", "Utf problem3") # sys.stderr.write("subEntityGraphicClass=%s\n"%subEntityGraphicClass) # If this is a script, always displayed on white, even if related to a specific entity. # THIS IS REALLY A SHAME BECAUSE WE JUST NEED THE ORIGINAL PROPERTY. if objUri.find("entity.py") < 0: objColor = "#FFFFFF" else: objColor = lib_patterns.EntityClassToColor( subEntityGraphicClass) # This lighter cololor for the first column. objColorLight = lib_patterns.ColorLighter(objColor) # Some colors a bit clearer ? Or take the original color of the class ? td_bgcolor_plain = '<td BGCOLOR="%s" ' % objColor td_bgcolor_light = '<td BGCOLOR="%s" ' % objColorLight td_bgcolor = td_bgcolor_plain # Some columns might not have a value. The first column is for the key. columns = [td_bgcolor + " ></td>"] * numberKeys # Just used for the vertical order of lines, one line per object. title = "" # TODO: CGIPROP. This is not a dict, the same key can appear several times ? for (key, val) in fieldsSet[objUri]: if key == pc.property_information: # This can be a short string only. title += val continue # TODO: This is hard-coded. if is_flat_property(key): # In fact, it might also be an internal URL with "entity.py" if lib_kbase.IsLiteral(val): if isinstance(val.value, (list, tuple)): strHtml = _format_element_aux(val.value) DEBUG("val.value=%s", strHtml) tmpCell = td_bgcolor + 'align="left">%s</td>' % strHtml else: tmpCell = td_bgcolor + 'align="left">%s</td>' % val.value else: # This displays objects in a table: The top-level object must be # in the same host, so there is no need to display a long label. valTitle = lib_naming.ParseEntityUriShort(val)[0] assert isinstance(valTitle, lib_util.six_text_type) # There might be non-ascii characters such as accents etc... try: valTitle.encode('ascii') except UnicodeEncodeError: valTitle = "Not ascii" valTitleUL = lib_exports.DotUL(valTitle) tmpCell = td_bgcolor + 'href="%s" align="left" >%s</td>' % ( val, valTitleUL) else: try: float(val) tmpCell = td_bgcolor + 'align="right">%s</td>' % val except: # Wraps the string if too long. Can happen only with a literal. tmpCell = td_bgcolor + 'align="left">%s</td>' % lib_exports.StrWithBr( val) idxKey = keyIndices[key] columns[idxKey] = tmpCell if title: title_key = title else: title_key = subObjNam # Maybe the first column is a literal ? if subEntityId != "PLAINTEXTONLY": # WE SHOULD PROBABLY ESCAPE HERE TOO. columns[ 0] = td_bgcolor_light + 'port="%s" href="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri, title_key) else: subNodUri = lib_util.html_escape(subNodUri) columns[ 0] = td_bgcolor_light + 'port="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri) # Several scripts might have the same help text, so add a number. # "Title" => "Title" # "Title" => "Title/2" # "Title" => "Title/3" etc... # Beware that it is quadratic with the number of scripts with identical info. title_idx = 2 title_uniq = title_key while title_uniq in dictHtmlLines: title_uniq = "%s/%d" % (title_key, title_idx) title_idx += 1 # TODO: L'ordre est base sur les chaines mais devrait etre base sur le contenu. Exemple: # TODO: "(TUT_UnixProcess) Handle=10" vient avant "(TUT_UnixProcess) Handle=2" # TODO: title_uniq devrait etre plutot la liste des proprietes. # TODO: By clicking on the column names, we could change the order. dictHtmlLines[title_uniq] = "".join(columns) # Replace the first column by more useful information. numNodLst = len(nodLst) # WBEM and WMI classes have the syntax: "ns1/ns2/ns3:class" and the class it self can have base classes. # Survol classes have the syntax: "dir/dir/dir/class": This considers that namespaces are not really # necessary and can be replaced by classes. Also, there is a one-to-one match between the class inheritance # tree and its directory. # If Survol had to be started from scratch, there would be one Python class per survol class, # and they would be stored in the top dir "root/cimv2" ... it is not too late ! # # This strips the upper directories: "mysql/instance" or "oracle/table", if this is a Survol class eltNam = subEntityGraphicClass.split("/")[-1] # This strips the namespace: "root/cimv2:CIM_LogicalElement", if this is a WBEM or WMI class. eltNam = eltNam.split(":")[-1] if not eltNam: # TODO: This is not the right criteria. Must select if we are listing scripts. eltNam = "script" eltNamPlural = lib_grammar.ToPlural(eltNam, numNodLst) txtElements = "%d %s" % (numNodLst, eltNamPlural) header = '<td border="1">' + lib_exports.DotBold( txtElements) + "</td>" # TODO: Replace each column name with a link which sorts the line based on this column. # The order of columns could be specified with an extra cgi argument with the columns names. for key in fieldsKeys: columnTitle = lib_kbase.qname(key, grph) columnTitle = columnTitle.replace("_", " ").capitalize() header += "<td border='1'>" + lib_exports.DotBold( columnTitle) + "</td>" # With an empty key, it comes first when sorting. dictHtmlLines[""] = header # MAYBE SHOULD BE DONE TWICE !!!!! SEE ALSO ELSEWHERE !!!! subjUrlClean = subjUrl.replace('&', '&') # BEWARE: The shape and the color of this HTML table is from the subjects, # because the elements can be of different classes, even if the share the same predicate. # TODO: Each row should have its own color according to its class. numFields = len(fieldsKeys) + 1 # The rows of this HTML table could belong to different classes: # What the shared is the predicate. Hence, the predicate, property name is used as a title. propNamPlural = lib_grammar.ToPlural(propNam, None) helpText = "List of " + propNamPlural + " in " + labText # TODO: Le title and the content are not necessarily of the same class. # labTextWithBr is the first line of the table containing nodes linked with the # same property. Unfortunately we have lost this property. labText = lib_exports.TruncateInSpace(labText, 30) labTextWithBr = lib_exports.StrWithBr(labText) labTextWithBr += ": " + propNam if entity_id == "PLAINTEXTONLY": subjUrlClean = "" # This color is the table's contour. lib_patterns.WritePatterned(stream, subjEntityGraphicClass, subjNamTab, helpText, '"#000000"', subjUrlClean, numFields, labTextWithBr, dictHtmlLines)
def output_rdf_graph_as_json_d3(page_title, error_msg, parameters, grph): """ Transforms a RDF graph into a JSON document. This returns a graph made of Json objects which are suitable for visualisation in the Javascript interface to Survol, which is based on D3. """ # Must be reset to zero between several executions, when run by WSGI. global _node_json_number _node_json_number = 0 # It contains a cache because the same nodes may appear several times. def node_to_json_obj(the_nod): try: return node_to_json_obj.dictNod2Json[the_nod] except KeyError: json_obj = NodeJson(the_nod) node_to_json_obj.dictNod2Json[the_nod] = json_obj return json_obj node_to_json_obj.dictNod2Json = dict() links = [] for subj, pred, obj in grph: # This applies only to entity.py : In rendering based on Json, scripts are not displayed as nodes, # but in hierarchical menus. The node must not appear at all. # TODO: Should probably also eliminate pc.property_rdf_data_nolist2 etc ... See lib_client. if pred == pc.property_script: logging.debug("continue subj=%s obj=%s", subj, obj) continue # Normal data scripts are not accepted. This should apply only to file_directory.py and file_to_mime.py if not _script_for_json(subj): continue if not _script_for_json(obj): continue subj_obj = node_to_json_obj(subj) subj_id = subj_obj.m_survol_url prop_nam = lib_exports.PropToShortPropNam(pred) # TODO: BUG: If several nodes for the same properties, only the last one is kept. if lib_kbase.IsLink(obj): obj_obj = node_to_json_obj(obj) obj_id = obj_obj.m_survol_url links.extend([{ 'source': subj_id, 'target': obj_id, 'survol_link_prop': prop_nam }]) # TODO: Add the name corresponding to the URL, in m_info_dict so that some elements # of the tooltip would be clickable. On the other hand, one just need to merge # the nodes relative to the object, by right-clicking. elif lib_kbase.IsLiteral(obj): if pred == pc.property_information: try: subj_obj.m_info_list.append(str(obj.value)) except UnicodeEncodeError: # 'ascii' codec can't encode character u'\xf3' in position 17: ordinal not in range(128) # https://stackoverflow.com/questions/9942594/unicodeencodeerror-ascii-codec-cant-encode-character-u-xa0-in-position-20 subj_obj.m_info_list.append(obj.value.encode('utf-8')) else: if isinstance(obj.value, six.integer_types) or isinstance( obj.value, six.string_types): subj_obj.m_info_dict[prop_nam] = obj.value else: # If the value cannot be serializable to JSON. subj_obj.m_info_dict[prop_nam] = type(obj.value).__name__ else: raise Exception(__file__ + " Cannot happen here") # Now, this creates the nodes sent as json objects. num_nodes = len(node_to_json_obj.dictNod2Json) nodes = [None] * num_nodes for nod in node_to_json_obj.dictNod2Json: nod_obj = node_to_json_obj.dictNod2Json[nod] nod_titl = nod_obj.m_label nod_id = nod_obj.m_index # The URL must not contain any HTML entities when in a XML or SVG document, # and therefore must be escaped. Therefore they have to be unescaped when transmitted in JSON. # This is especially needed for RabbitMQ because the parameter defining its connection name # has the form: "Url=LOCALHOST:12345,Connection=127.0.0.1:51748 -> 127.0.0.1:5672" # HTTP_MIME_URL the_survol_nam = lib_util.survol_unescape( nod_titl) # MUST UNESCAPE HTML ENTITIES ! # TODO: Use the same object for lookup and Json. nodes[nod_id] = { 'id': nod_obj.m_survol_url, # Required by D3 'name': the_survol_nam, # Theoretically, this URL should be HTML unescaped then CGI escaped. 'survol_url': nod_obj.m_survol_url, # Duplicate of 'id' 'survol_universal_alias': nod_obj.m_survol_universal_alias, 'survol_fill': nod_obj.m_color, 'entity_class': nod_obj. m_class, # TODO: Maybe not needed because also in the URL ? 'survol_info_list': nod_obj.m_info_list, 'survol_info_dict': nod_obj.m_info_dict } # This is the graph displayed by D3. graph = {"page_title": page_title, "nodes": nodes, "links": links} _write_json_header(json.dumps(graph, indent=2))
def output_rdf_graph_as_json_menu(page_title, error_msg, parameters, grph): """ This returns a tree of scripts, usable as the contextual menu of a node displayed in the D3 Javascript interface to Survol. The RDF content is already created, so this keeps only the nodes related to scripts. TODO: It would be faster to keep only the tree of scripts. The script "entity.py" should have a different output when mode=json. It does not return a network but a tree to be displayed in a contextual menu. It has a completely different layout as a normal RDF transformed into JSON, so probably the URL should be different as well. Input example: "http://127.0.0.1:8000/survol/entity.py?xid=CIM_Process.Handle=3812&mode=json" """ # TODO: Should add WBEM and WMI ? # For each node, the subscripts. Therefore it can only be a directory. nodes_to_items = {} # Nodes of scripts which have a parent. nodes_with_parent = set() # Later used to calculate the list of scripts which do not have a parent # directory: They will be displayed at the top of the contextual menu. subject_nodes = set() # The name of each node. nodes_to_names = dict() for subj, pred, obj in grph: if pred == pc.property_script: try: nodes_to_items[subj].append(obj) except KeyError: nodes_to_items[subj] = [obj] if lib_kbase.IsLiteral(obj): # This is the name of a subdirectory containing scripts. nodes_to_names[obj] = obj nodes_with_parent.add(obj) subject_nodes.add(subj) elif pred == pc.property_information: if lib_kbase.IsLiteral(obj): nodes_to_names[subj] = obj.value else: raise Exception("Cannot happen here also") else: pass top_level_nodes = subject_nodes - nodes_with_parent # The output result must be sorted. def add_stuff(the_nod_list, depth=0): list_json_items = {} for one_rdf_nod in the_nod_list: one_json_nod = { "name": nodes_to_names.get(one_rdf_nod, "No name"), "url": one_rdf_nod } # This should be the sort key. # Maybe it does not have subitems. try: lst_item = nodes_to_items[one_rdf_nod] one_json_nod["items"] = add_stuff(lst_item, depth + 1) except KeyError: pass list_json_items[one_rdf_nod] = one_json_nod return list_json_items menu_json = add_stuff(top_level_nodes) # There is only one top-level element. one_menu_val = {} for one_menu_key in menu_json: one_menu_val = menu_json[one_menu_key]["items"] break # Writes the content to the HTTP client. _write_json_header(json.dumps(one_menu_val, sort_keys=True, indent=2))
def ProcessCollapsedProperties(propNam): dictCollapsedSubjectsToObjectLists = dictPropsCollapsedSubjectsToObjectLists[ propNam] logfil.write(TimeStamp() + " Rdf2Dot: dictCollapsedSubjectsToObjectLists=%d.\n" % (len(dictCollapsedSubjectsToObjectLists))) for subjUrl, nodLst in lib_util.six_iteritems( dictCollapsedSubjectsToObjectLists): subjNam = RdfNodeToDotLabel(subjUrl) subjNamTab = CollapsedLabel(propNam, subjNam) try: # TODO: Cette logique ajoute parfois un niveau de noeud inutile. Plus exactement, ca duplique un noeud. # Ou plus exactement, le noed est represente par deux objects graphiques: # * Un qui a les scripts. # * Un autre qui a la liste HTML qu on fabrique. # => Peut-on imaginer de melanger les deux ?? # Dans WritePatterns: Ajouter le nom du noeud au label. subjNam = SubjNamFromCollapsed(propNam, subjNam) except KeyError: pass # Point from the subject to the table containing the objects. stream.write(pattEdgeOrien % (subjNam, subjNamTab, "GREEN", propNam)) (labText, subjEntityGraphicClass, entity_id) = lib_naming.ParseEntityUri(subjUrl) # Probleme avec les champs: # Faire une premiere passe et reperer les fields, detecter les noms des colonnes, leur attribuer ordre et indice. # Seconde passe pour batir les lignes. # Donc on ordonne toutes les colonnes. # Pour chaque field: les prendre dans le sens du header et quand il y a un trou, colonne vide. # Inutile de trier les field, mais il d'abord avoir une liste complete des champs, dans le bon sens. # CA SUPPOSE QUE DANS FIELDSSET LES KEYS SONT UNIQUES. # SI ON NE PEUT PAS, ALORS ON METTRA DES LISTES. MAIS CETTE CONTRAINTE SIMPLIFIE L'AFFICHAGE. # DOMMAGE QU ON SCANNE LES OBJETS DEUX FOIS UNIQUEMENT POUR AVOIR LES NOMS DES CHAMPS !!!!!!!!!!!!! # TODO: HEURISTIQUE: ON pourrait s'arreter aux dix premiers. Ou bien faire le tri avant ? # On bien prendre les colonnes de la premiere ligne, et recommencer si ca ne marche pas. # Unique columns of the descendant of this subject. rawFieldsKeys = set() for obj in nodLst: # One table per node. rawFieldsKeys.update(fld[0] for fld in fieldsSet[obj]) # sys.stderr.write("rawFieldsKeys BEFORE =%s\n" % str(rawFieldsKeys) ) # Mandatory properties must come at the beginning of the columns of the header, with first indices. # BUG: Si on retire html de cette liste alors qu il y a des valeurs, colonnes absentes. # S il y a du html ou du RDF, on veut que ca vienne en premier. fieldsKeysOrdered = [] for fldPriority in FlatPropertertiesList: try: # Must always be appended. BUT IF THERE IS NO html_data, IS IT WORTH ? # TODO: Remove if not HTML and no sub-rdf. CGIPROP # If the property is never used, exception then next property. rawFieldsKeys.remove(fldPriority) fieldsKeysOrdered.append(fldPriority) except KeyError: pass # This one is always removed because its content is concatenated at the first column. for fldToRemove in [pc.property_information]: try: rawFieldsKeys.remove(fldToRemove) except KeyError: pass # Appends rest of properties, sorted. fieldsKeys = fieldsKeysOrdered + sorted(rawFieldsKeys) # sys.stderr.write("fieldsKeys=%s\n" % str(fieldsKeys) ) # This assumes that the header columns are sorted. keyIndices = { nameKey: indexKey for (indexKey, nameKey) in enumerate(fieldsKeys, 1) } numberKeys = len(keyIndices) + 1 # Apparently, no embedded tables. dictHtmlLines = dict() for objUri in nodLst: # One table per node. subObjId = RdfNodeToDotLabel(objUri) # Beware "\L" which should not be replaced by "<TABLE>" but this is not the right place. subNodUri = objUri.replace('&', '&') try: (subObjNam, subEntityGraphicClass, subEntityId) = lib_naming.ParseEntityUriShort(objUri) except UnicodeEncodeError: sys.stderr.write("UnicodeEncodeError error:%s\n" % (objUri)) (subObjNam, subEntityGraphicClass, subEntityId) = ("Utf problem1", "Utf problem2", "Utf problem3") # sys.stderr.write("subEntityGraphicClass=%s\n"%subEntityGraphicClass) # If this is a script, always displayed on white, even if reletd to a specific entity. # THIS IS REALLY A SHAME BECAUSE WE JUST NEED THE ORIGINAL PROPERTY. if objUri.find("entity.py") < 0: objColor = "#FFFFFF" else: objColor = lib_patterns.EntityClassToColor( subEntityGraphicClass) # This lighter cololor for the first column. objColorLight = lib_patterns.ColorLighter(objColor) # Some colors a bit clearer ? Or take the original color of the class ? td_bgcolor_plain = '<td BGCOLOR="%s" ' % objColor td_bgcolor_light = '<td BGCOLOR="%s" ' % objColorLight td_bgcolor = td_bgcolor_plain # Some columns might not have a value. The first column is for the key. columns = [td_bgcolor + " ></td>"] * numberKeys # Just used for the vertical order of lines, one line per object. title = "" # TODO: CGIPROP. This is not a dict, the same key can appear several times ? for (key, val) in fieldsSet[objUri]: if key == pc.property_information: # This can be a short string only. title += val continue # TODO: This is hard-coded. if IsFlatProperty(key): # In fact, it might also be an internal URL with "entity.py" if lib_kbase.IsLiteral(val): if isinstance(val.value, (list, tuple)): strHtml = FormatElementAux(val.value) sys.stderr.write("val.value=%s\n" % strHtml) tmpCell = td_bgcolor + 'align="left">%s</td>' % strHtml else: tmpCell = td_bgcolor + 'align="left">%s</td>' % val.value else: valTitle = lib_naming.ParseEntityUri(val)[0] valTitleUL = lib_exports.DotUL(valTitle) tmpCell = td_bgcolor + 'href="%s" align="left" >%s</td>' % ( val, valTitleUL) else: try: float(val) tmpCell = td_bgcolor + 'align="right">%s</td>' % val except: # Wraps the string if too long. Can happen only with a literal. tmpCell = td_bgcolor + 'align="left">%s</td>' % lib_exports.StrWithBr( val) idxKey = keyIndices[key] columns[idxKey] = tmpCell if title: title_key = title else: title_key = subObjNam # Maybe the first column is a literal ? if subEntityId != "PLAINTEXTONLY": # WE SHOULD PROBABLY ESCAPE HERE TOO. columns[ 0] = td_bgcolor_light + 'port="%s" href="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri, title_key) else: subNodUri = cgi.escape(subNodUri) columns[ 0] = td_bgcolor_light + 'port="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri) # Several scripts might have the same help text, so add a number. # "Title" => "Title" # "Title" => "Title/2" # "Title" => "Title/3" etc... # Beware that it is quadratic with the number of scripts with identical info. title_idx = 2 title_uniq = title_key while title_uniq in dictHtmlLines: title_uniq = "%s/%d" % (title_key, title_idx) title_idx += 1 # TODO: L'ordre est base sur les chaines mais devrait etre base sur le contenu. Exemple: # TODO: "(TUT_UnixProcess) Handle=10" vient avant "(TUT_UnixProcess) Handle=2" # TODO: title_uniq devrait etre plutot la liste des proprietes. # TODO: By clicking on the column names, we could change the order. dictHtmlLines[title_uniq] = "".join(columns) # Replace the first column by more useful information. numNodLst = len(nodLst) # TODO: Compute this once for all. eltNam = subEntityGraphicClass.split("/")[-1] if not eltNam: # TODO: This is not the right criteria. Must select if we are listing scripts. eltNam = "script" eltNamPlural = lib_grammar.ToPlural(eltNam, numNodLst) txtElements = "%d %s" % (numNodLst, eltNamPlural) header = '<td border="1">' + lib_exports.DotBold( txtElements) + "</td>" # TODO: Replace each column name with a link which sorts the line based on this column. # The order of columns could be specified with an extra cgi argument with the columns names. for key in fieldsKeys: columnTitle = qname(key, grph) columnTitle = columnTitle.replace("_", " ").capitalize() header += "<td border='1'>" + lib_exports.DotBold( columnTitle) + "</td>" # With an empty key, it comes first when sorting. dictHtmlLines[""] = header # MAYBE SHOULD BE DONE TWICE !!!!! SEE ALSO ELSEWHERE !!!! subjUrlClean = subjUrl.replace('&', '&') # ATTENTION: La forme du record est celle du sujet. # ca veut donc dire qu'on va avoir la meme couleur pour des objets de types # differents s'ils sont dans la meme relation avec un sujet identique ? numFields = len(fieldsKeys) + 1 # The label might be truncated if subjEntityGraphicClass: helpText = "List of " + subjEntityGraphicClass + " objects in " + labText else: helpText = "List of scripts in " + labText # TODO: Le title and the content are not necessarily of the same class. # labTextWithBr is the first line of the table containing nodes linked with the # same property. Unfortunately we have lost this property. labText = lib_exports.TruncateInSpace(labText, 30) labTextWithBr = lib_exports.StrWithBr(labText) labTextWithBr += ": " + propNam if entity_id == "PLAINTEXTONLY": subjUrlClean = "" # This color is the table's contour. lib_patterns.WritePatterned(stream, subjEntityGraphicClass, subjNamTab, helpText, '"#000000"', subjUrlClean, numFields, labTextWithBr, dictHtmlLines)
def Main(): lib_util.SetLoggingConfig(logging.ERROR) # https://hhs.github.io/meshrdf/sparql-and-uri-requests # Parameter name, SELECT queries, CONSTRUCT queries, default, help. # format # Accepts HTML*, XML, CSV, TSV or JSON # Accepts HTML*, XML, JSON-LD, RDF/XML, TURTLE or N3 # Default: HTML* # Returns a file with the specified syntax. # inference # Accepts true or false # Accepts true or false # Default: false # Running a query with inference set to "true" will return results # for all subclasses and subproperties of those classes and properties you specify in your query. # For example, there are no direct instances of meshv:Descriptor, # but if you run a query with inference and look for rdf:type meshv:Descriptor, # you will get all instances of meshv:Descriptor's subclasses - meshv:TopicalDescriptor, # meshv:GeographicalDescriptor, meshv:PublicationType and meshv:CheckTag. # Running a query with inference=true may affect performance. # limit # Accepts positive integers up to 1000 # N/A # Default: 1000 # Limits the number of results per request. The maximum number of results per request for SELECT queries is 1,000. # This parameter does not affect CONSTRUCT queries. # CONSTRUCT queries will return all triples requested up to a limit of 10,000 triples. # offset # Accepts positive integers # N/A # Default: 0 # When offset=n, this parameter will return results starting with the nth result. # Use this parameter to loop through multiple requests for large result sets. # query # Accepts a SELECT SPARQL query # Accepts a CONSTRUCT SPARQL query # Default: N/A # This parameter is required and must contain a SPARQL query. For an example of how these are formatted, # run a query from the SPARQL query editor and view the resulting URL. # year # Accepts "current" or a year. # Accepts "current" or a year. # Default: current # Queries either the current MeSH graph (http://id.nlm.nih.gov/mesh) or a versioned MeSH graph, # for example: (http://id.nlm.nih.gov/mesh/2015). import cgi arguments = cgi.FieldStorage() # See lib_uris.SmbShareUri and the HTTP server which collapses duplicated slashes "//" into one, # in URL, because they are interpreted as file names. # SparqlWrapper does not encode slashes with urllib.quote_plus(param.encode('UTF-8'), safe='/') # in Wrapper.py. # See modules CGIHTTPServer, BaseHTTPServer, CGIHTTPRequestHandler # 'HTTP_USER_AGENT': 'sparqlwrapper 1.8.4 (rdflib.github.io/sparqlwrapper)' # sys.stderr.write("Environ=%s\n"%str(os.environ)) # QUERY_STRING='query=%0A++++++++++++++++PREFIX+wmi%3A++%3Chttp%3A/www.primhillcomputers.com/ontology/wmi%23%3E%0A++++++++++++++++PREFIX+survol%3A++%3Chttp%3A/primhillcomputers.com/survol%23%3E%0A++++++++++++++++PREFIX+rdfs%3A++++%3Chttp%3A/www.w3.org/2000/01/rdf-schema% # 23%3E%0A++++++++++++++++SELECT+%3Fcaption%0A++++++++++++++++WHERE%0A++++++++++++++++%7B%0A++++++++++++++++++++%3Furl_user+rdf%3Atype+survol%3AWin32_UserAccount+.%0A++++++++++++++++++++%3Furl_user+survol%3AName+%27rchateau%27+.%0A++++++++++++++++++++%3Furl_user+sur # vol%3ACaption+%3Fcaption+.%0A++++++++++++++++++++%3Furl_user+rdfs%3AseeAlso+%22WMI%22+.%0A++++++++++++++++%7D%0A++++++++++++++++&output=json&results=json&format=json' sparql_query = arguments["query"].value # 'SERVER_SOFTWARE': 'SimpleHTTP/0.6 Python/2.7.10' # 'SERVER_SOFTWARE': 'SimpleHTTP/0.6 Python/3.6.3' sys.stderr.write("SERVER_SOFTWARE=%s\n" % os.environ['SERVER_SOFTWARE']) if os.environ['SERVER_SOFTWARE'].startswith("SimpleHTTP"): # Beware, only with Python2. Not needed on Linux and Python 3. # Maybe because the Python class processes the CGI arguments like filenames. if not lib_util.is_py3 and lib_util.isPlatformWindows: sparql_query = re.sub("([^a-z]*)http:/([^a-z]*)", r"\1http://\2", sparql_query) sys.stderr.write("Substitution 'http://' and 'http:///'\n") sys.stderr.write("sparql_server sparql_query=%s\n" % sparql_query.replace(" ","=")) try: result_format = arguments["format"].value except KeyError: result_format = "HTML*" sys.stderr.write("\n") query_result = __run_sparql_query(sparql_query) sys.stderr.write("sparql_server After query len(query_result)=%d\n" % len(query_result)) sys.stderr.write("sparql_server After query query_result=%s\n" % str(query_result)) # TODO: This does not work "select *", so maybe should read the first row. row_header = __query_header(sparql_query) # https://docs.aws.amazon.com/neptune/latest/userguide/sparql-api-reference-mime.html if result_format.upper() == "JSON": mime_format = "application/sparql-results+json" # https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/ # This document describes how to serialize SPARQL results (SELECT and ASK query forms) in a JSON format. # { # "head": {"vars": ["book", "title"] # }, # "results": { # "bindings": [ # { # "book": {"type": "uri", "value": "http://example.org/book/book6"}, # "title": {"type": "literal", "value": "Harry Potter and the Half-Blood Prince"} # }, bindings_list = [] for one_row in query_result: dict_row = {} for ix in range(len(row_header)): one_element = one_row[ix] if lib_kbase.IsLiteral(one_element): json_element = {"type":"literal", "value": str(one_element)} elif lib_kbase.IsURIRef(one_element): json_element = {"type":"url", "value": str(one_element)} else: raise Exception("SparqlServer: Invalid type:%s"%str(one_element)) one_variable = row_header[ix] dict_row[one_variable] = json_element bindings_list.append(dict_row) sys.stderr.write("bindings_list=%s\n"%str(bindings_list)) json_output = { "head": {"vars": row_header}, "results": {"bindings": bindings_list}} str_output = json.dumps(json_output) elif result_format.upper() == "XML": mime_format = "application/sparql-results+xml" # https://www.w3.org/TR/rdf-sparql-XMLres/ # This document describes an XML format for the variable binding and boolean results formats provided by the SPARQL query language for RDF # <?xml version="1.0"?> # <sparql xmlns="http://www.w3.org/2005/sparql-results#"> # <head> # <variable name="x"/> # <variable name="hpage"/> # </head> # # <results> # <result> # <binding name="x"> ... </binding> # <binding name="hpage"> ... </binding> # </result> root = ET.Element("sparql") head = ET.SubElement(root, "head") for one_variable in row_header: ET.SubElement(head, "variable", name=one_variable) results = ET.SubElement(root, "results") for one_row in query_result: result = ET.SubElement(results, "result") for ix in range(len(row_header)): one_variable = row_header[ix] ET.SubElement(result, "binding", name=one_variable).text = one_row[ix] str_output = ET.tostring(root, encoding='utf8', method='xml') sys.stderr.write("sparql_server str_output=%s\n"%str_output) else: raise Exception("Results format %s not implemented yet"%result_format) sys.stderr.write("sparql_server result_format=%s str_output=%s\n"%(result_format, str_output)) lib_util.WrtHeader(mime_format) lib_util.WrtAsUtf(str_output)
def Grph2Json(page_title, error_msg, isSubServer, parameters, grph): # Must be reset to zero between several executions, when run by WSGI. global NodeJsonNumber NodeJsonNumber = 0 # It contains a cache because the same nodes may appear several times. def NodeToJsonObj(theNod): try: return NodeToJsonObj.dictNod2Json[theNod] except KeyError: jsonObj = NodeJson(theNod) NodeToJsonObj.dictNod2Json[theNod] = jsonObj return jsonObj NodeToJsonObj.dictNod2Json = dict() links = [] for subj, pred, obj in grph: # This applies only to entity.py : In rendering based on Json, scripts are not displayed as nodes, # but in hierarchical menus. The node must not appear at all. if pred == pc.property_script: sys.stderr.write("continue subj=%s obj=%s\n" % (subj, obj)) continue # Normal data scripts are not accepted. This should apply only to file_directory.py and file_to_mime.py if not ScriptForJson(subj): continue if not ScriptForJson(obj): continue subjObj = NodeToJsonObj(subj) #subj_id = subjObj.m_index subj_id = subjObj.m_survol_url propNam = PropToShortPropNam(pred) # TODO: BUG: If several nodes for the same properties, only the last one is kept. if lib_kbase.IsLink(obj): objObj = NodeToJsonObj(obj) #obj_id = objObj.m_index obj_id = objObj.m_survol_url links.extend([{ 'source': subj_id, 'target': obj_id, 'survol_link_prop': propNam }]) # TODO: Add the name corresponding to the URL, in m_info_dict so that some elements # of the tooltip would be clickable. On the other hand, one just need to merge # the nodes relative to the object, by right-clicking. elif lib_kbase.IsLiteral(obj): if pred == pc.property_information: try: subjObj.m_info_list.append(str(obj.value)) except UnicodeEncodeError: # 'ascii' codec can't encode character u'\xf3' in position 17: ordinal not in range(128) # https://stackoverflow.com/questions/9942594/unicodeencodeerror-ascii-codec-cant-encode-character-u-xa0-in-position-20 subjObj.m_info_list.append(obj.value.encode('utf-8')) else: if isinstance(obj.value, lib_util.six_integer_types) or isinstance( obj.value, lib_util.six_string_types): subjObj.m_info_dict[propNam] = obj.value else: # If the value cannot be serializable to JSON. subjObj.m_info_dict[propNam] = type(obj.value).__name__ else: raise "Cannot happen here" # Now, this creates the nodes sent as json objects. numNodes = len(NodeToJsonObj.dictNod2Json) # sys.stderr.write("Grph2Json numNodes=%d\n"%numNodes) nodes = [None] * numNodes for nod in NodeToJsonObj.dictNod2Json: nodObj = NodeToJsonObj.dictNod2Json[nod] nod_titl = nodObj.m_label nod_id = nodObj.m_index obj_link = nod # sys.stderr.write("nod_titl=%s obj_link=%s\n"%(nod_titl,obj_link)) # The URL must not contain any HTML entities when in a XML or SVG document, # and therefore must be escaped. Therefore they have to be unescaped when transmitted in JSON. # This is especially needed for RabbitMQ because the parameter defining its connection name # has the form: "Url=LOCALHOST:12345,Connection=127.0.0.1:51748 -> 127.0.0.1:5672" # HTTP_MIME_URL the_survol_nam = lib_util.survol_HTMLParser().unescape( nod_titl) # MUST UNESCAPE HTML ENTITIES ! # TODO: Use the same object for lookup and Json. nodes[nod_id] = { 'id': nodObj.m_survol_url, # Required by D3 'name': the_survol_nam, # Theoretically, this URL should be HTML unescaped then CGI escaped. 'survol_url': nodObj.m_survol_url, # Duplicate of 'id' 'survol_universal_alias': nodObj.m_survol_universal_alias, 'survol_fill': nodObj.m_color, 'entity_class': nodObj.m_class, # TODO: Maybe not needed because also in the URL ? 'survol_info_list': nodObj.m_info_list, 'survol_info_dict': nodObj.m_info_dict } graph = {} graph["page_title"] = page_title graph["nodes"] = nodes graph["links"] = links WriteJsonHeader(json.dumps(graph, indent=2))