def _format_element(val, depth=0): if lib_kbase.IsLink(val): val_title = "_format_element " + external_url_to_title(val) val_title_ul = _dot_ul(val_title) return "<td align='left' balign='left' border='0' href='%s'>%s</td>" % (val, val_title_ul) res_str = _format_element_aux(val, depth) return "<td align='left' balign='left' border='0'>%s</td>" % res_str
def FormatElement(val, depth=0): if lib_kbase.IsLink(val): valTitle = "FormatElement " + ExternalToTitle(val) valTitleUL = lib_exports.DotUL(valTitle) return "<td align='left' balign='left' border='0' href='%s'>%s</td>" % ( val, valTitleUL) resStr = FormatElementAux(val, depth) return "<td align='left' balign='left' border='0'>%s</td>" % resStr
def _format_element(val, depth=0): if lib_kbase.IsLink(val): valTitle = "_format_element " + external_url_to_title(val) valTitleUL = lib_exports.DotUL(valTitle) return "<td align='left' balign='left' border='0' href='%s'>%s</td>" % ( val, valTitleUL) resStr = _format_element_aux(val, depth) return "<td align='left' balign='left' border='0'>%s</td>" % resStr
def ExtractTitleFromMapProps(mapProps): if len(mapProps) != 1: return None for oneProp in mapProps: if oneProp != pc.property_information: return None lstStr = mapProps[oneProp] if len(lstStr) != 1: return None retStr = lstStr[0] if lib_kbase.IsLink(retStr): return None return str(retStr)
def extract_title_from_map_props(argument_map_props): if len(argument_map_props) != 1: return None for one_prop in argument_map_props: lst_str = argument_map_props[one_prop] # This is to prevent an infinite recursion if a title cannot be found. # This happens when displaying all scripts with "Show all scripts" flag. if one_prop == pc.property_error: return str(lst_str) if one_prop != pc.property_information: return None if len(lst_str) != 1: return None ret_str = lst_str[0] if lib_kbase.IsLink(ret_str): return None return str(ret_str) logging.critical("Should not be here. argument_map_props=%s", str(argument_map_props))
def _display_class_objects_no_jinja(dict_subj_prop_obj): """The subjects must be sorted by their title.""" tuples_subjects_list = [] for a_subj in dict_subj_prop_obj: subj_str = str(a_subj) subj_title, entity_graphic_class, entity_id = lib_naming.ParseEntityUri( subj_str) if subj_title: # The intention is to detect a specific test case with accented characters. if subj_title[0] == 'Y' and subj_title.find("Boulogne"): logging.debug("_display_class_objects_no_jinja subj_str=%s" % subj_str) logging.debug("_display_class_objects_no_jinja subj_title=%s" % subj_title) continue else: logging.debug("NO TITLE FOR %s" % subj_str) tuples_subjects_list.append( (a_subj, subj_str, subj_title, entity_graphic_class, entity_id)) # Sorted by the title of the subject, which is the third value of the tuple. lib_util.natural_sort_list(tuples_subjects_list, key=lambda tup: tup[2]) # Apparently, a problem is that "%" gets transformed into an hexadecimal number, preventing decoding. def _custom_decode_hex(the_str): the_str = lib_util.survol_unescape(the_str) return the_str.replace("%25", "%").replace("%2F", "/").replace( "%5C", "\\").replace("%3A", ":") # Now it iterates on the sorted list. # This reuses all the intermediate values. for a_subj, subj_str, subj_title, entity_graphic_class, entity_id in tuples_subjects_list: # FIXME: This is a specific test to catch a specific condition... if a_subj.find("Boulogne") >= 0 or subj_str.find( "Boulogne") >= 0 or subj_title.find("Boulogne") >= 0: logging.debug("a_subj=%s" % a_subj) logging.debug("subj_str=%s" % subj_str) logging.debug("subj_title=%s" % subj_title) continue dict_pred = dict_subj_prop_obj[a_subj] # Total number of lines. cnt_preds = 0 for a_pred in dict_pred: lst_objs = dict_pred[a_pred] cnt_preds += len(lst_objs) must_write_col_one_subj = True subj_str_with_mode = _url_in_html_mode(subj_str) # The predicates, i.e. the properties associated a subject with an object, # must be alphabetically sorted. for a_pred in lib_util.natural_sorted(dict_pred): lst_objs = dict_pred[a_pred] pred_str = lib_exports.AntiPredicateUri(str(a_pred)) cnt_objs = len(lst_objs) must_write_col_one_pred = True # The objects must be sorted by title. lst_tuples_objs = [] for an_obj in lst_objs: obj_str = str(an_obj) obj_str = _custom_decode_hex(obj_str) obj_title = lib_naming.ParseEntityUri(obj_str)[0] lst_tuples_objs.append((an_obj, obj_str, obj_title)) # Sorted by the title of the object, which is the third value of the tuple. lib_util.natural_sort_list(lst_tuples_objs, key=lambda tup: tup[2]) for an_obj, obj_str, obj_title in lst_tuples_objs: # FIXME: This is a specific test to catch a specific condition... if an_obj.find("Boulogne") >= 0 or obj_str.find( "Boulogne") >= 0 or obj_title.find("Boulogne") >= 0: logging.debug("an_obj=%s" % an_obj) logging.debug("obj_str=%s" % obj_str) logging.debug("obj_title=%s" % obj_title) continue # Nasty encoding errors. Probably the string should have been encoded before. if lib_util.isPlatformWindows and not lib_util.is_py3: try: dummy_str = obj_title.decode('ascii') except UnicodeDecodeError as exc: logging.debug("obj_title=%s" % obj_title) obj_title = obj_title.decode('cp1252') yield '<tr>' if must_write_col_one_subj: yield ( '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>' % (str(cnt_preds), subj_str_with_mode, subj_title)) must_write_col_one_subj = False if must_write_col_one_pred: if a_pred not in _list_props_td_double_col_span: yield '<td valign="top" rowspan="%s">%s</td>' % ( str(cnt_objs), pred_str) must_write_col_one_pred = False if a_pred in _list_props_td_double_col_span: col_span = 2 else: col_span = 1 disp_mime_urls = True yield '<td colspan="%d">' % col_span if disp_mime_urls: if lib_kbase.IsLink(an_obj): obj_str_clean = lib_util.UrlNoAmp(obj_str) mimeType = lib_mime.get_mime_type_from_url( obj_str_clean) if mimeType: if mimeType.startswith("image/"): yield ( """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>""" % (obj_str, obj_str, obj_title)) else: yield """<a href="%s">%s</a>""" % (obj_str, obj_title) else: url_with_mode = lib_util.url_mode_replace( obj_str, "html") yield """<a href="%s">%s</a>""" % (url_with_mode, obj_title) else: yield '%s' % obj_str else: if lib_kbase.IsLink(an_obj): url_with_mode = _url_in_html_mode(obj_str) yield '<a href="%s">%s</a>' % (url_with_mode, obj_title) else: yield '%s' % obj_str yield "</td>" yield "</tr>"
def _display_level_table(subj, depth_menu=1): """ Top-level should always be none. TODO: Have another version which formats all cells the same way. For this, have a first pass which counts, at each node, the number of sub-nodes. Then a second pass which uses these counts and the current depth, to calculate the rowspan and colspan of each cell. Although elegant, it is not garanteed to work. """ yield '<table class="scripts_tree_class">' try: map_props = dict_scripts[subj] except KeyError: return # Beware of : # <type 'exceptions.RuntimeError'>: maximum recursion depth exceeded while calling a Python object # # Which happens if None is always returned. # http://mymachine:8000/survol/entity.py?edimodargs_Handle=6744&Show+all+scripts=True&edimodtype=CIM_Process&xid=CIM_Process.Handle%3D6744&mode=html # An invalid PID does the same so it is easy to reproduce def extract_title_from_map_props(argument_map_props): if len(argument_map_props) != 1: return None for one_prop in argument_map_props: lst_str = argument_map_props[one_prop] # This is to prevent an infinite recursion if a title cannot be found. # This happens when displaying all scripts with "Show all scripts" flag. if one_prop == pc.property_error: return str(lst_str) if one_prop != pc.property_information: return None if len(lst_str) != 1: return None ret_str = lst_str[0] if lib_kbase.IsLink(ret_str): return None return str(ret_str) logging.critical("Should not be here. argument_map_props=%s", str(argument_map_props)) yield '<tr>' depth_menu += 1 if depth_menu > 10: logging.critical(""" FIXME: Infinite loop when the __doc__ string of a script is the same as the title string of its Usable() function. """) # FIXME: Cannot find the problem. return logging.info("map_props=%s", map_props) subj_uniq_title = extract_title_from_map_props(map_props) logging.info("subj_uniq_title=%s depth_menu=%d", subj_uniq_title, depth_menu) if subj: subj_str = str(subj) yield '<td valign="top" rowspan="%d" class="scripts_tree_class">' % len( map_props) if lib_kbase.IsLink(subj): url_with_mode = _url_in_html_mode(subj_str) if subj_uniq_title: subj_uniq_title_not_none = subj_uniq_title else: subj_uniq_title_not_none = "No title" yield '<a href="' + url_with_mode + '">' + subj_uniq_title_not_none + "</a>" else: yield subj_str yield "</td>" if not subj_uniq_title: logging.info("len(map_props)=%d", len(map_props)) for one_prop in map_props: lst_objs = map_props[one_prop] logging.info("one_prop=%s len(lst_objs)=%d", one_prop, len(lst_objs)) yield '<td class="scripts_tree_class">' yield '<table>' for one_obj in lst_objs: if one_obj is None: continue yield '<tr>' yield '<td class="scripts_tree_class">' try: logging.info("depth_menu=%d", depth_menu) for table_lin_html in _display_level_table( one_obj, depth_menu): yield table_lin_html except KeyError: yield "Script error: " + str(one_obj) yield '</td>' yield '</tr>' yield '</table>' yield '</td>' yield '</tr>' yield "</table>"
def output_rdf_graph_as_json_d3(page_title, error_msg, parameters, grph): """ Transforms a RDF graph into a JSON document. This returns a graph made of Json objects which are suitable for visualisation in the Javascript interface to Survol, which is based on D3. """ # Must be reset to zero between several executions, when run by WSGI. global _node_json_number _node_json_number = 0 # It contains a cache because the same nodes may appear several times. def node_to_json_obj(the_nod): try: return node_to_json_obj.dictNod2Json[the_nod] except KeyError: json_obj = NodeJson(the_nod) node_to_json_obj.dictNod2Json[the_nod] = json_obj return json_obj node_to_json_obj.dictNod2Json = dict() links = [] for subj, pred, obj in grph: # This applies only to entity.py : In rendering based on Json, scripts are not displayed as nodes, # but in hierarchical menus. The node must not appear at all. # TODO: Should probably also eliminate pc.property_rdf_data_nolist2 etc ... See lib_client. if pred == pc.property_script: logging.debug("continue subj=%s obj=%s", subj, obj) continue # Normal data scripts are not accepted. This should apply only to file_directory.py and file_to_mime.py if not _script_for_json(subj): continue if not _script_for_json(obj): continue subj_obj = node_to_json_obj(subj) subj_id = subj_obj.m_survol_url prop_nam = lib_exports.PropToShortPropNam(pred) # TODO: BUG: If several nodes for the same properties, only the last one is kept. if lib_kbase.IsLink(obj): obj_obj = node_to_json_obj(obj) obj_id = obj_obj.m_survol_url links.extend([{ 'source': subj_id, 'target': obj_id, 'survol_link_prop': prop_nam }]) # TODO: Add the name corresponding to the URL, in m_info_dict so that some elements # of the tooltip would be clickable. On the other hand, one just need to merge # the nodes relative to the object, by right-clicking. elif lib_kbase.IsLiteral(obj): if pred == pc.property_information: try: subj_obj.m_info_list.append(str(obj.value)) except UnicodeEncodeError: # 'ascii' codec can't encode character u'\xf3' in position 17: ordinal not in range(128) # https://stackoverflow.com/questions/9942594/unicodeencodeerror-ascii-codec-cant-encode-character-u-xa0-in-position-20 subj_obj.m_info_list.append(obj.value.encode('utf-8')) else: if isinstance(obj.value, six.integer_types) or isinstance( obj.value, six.string_types): subj_obj.m_info_dict[prop_nam] = obj.value else: # If the value cannot be serializable to JSON. subj_obj.m_info_dict[prop_nam] = type(obj.value).__name__ else: raise Exception(__file__ + " Cannot happen here") # Now, this creates the nodes sent as json objects. num_nodes = len(node_to_json_obj.dictNod2Json) nodes = [None] * num_nodes for nod in node_to_json_obj.dictNod2Json: nod_obj = node_to_json_obj.dictNod2Json[nod] nod_titl = nod_obj.m_label nod_id = nod_obj.m_index # The URL must not contain any HTML entities when in a XML or SVG document, # and therefore must be escaped. Therefore they have to be unescaped when transmitted in JSON. # This is especially needed for RabbitMQ because the parameter defining its connection name # has the form: "Url=LOCALHOST:12345,Connection=127.0.0.1:51748 -> 127.0.0.1:5672" # HTTP_MIME_URL the_survol_nam = lib_util.survol_unescape( nod_titl) # MUST UNESCAPE HTML ENTITIES ! # TODO: Use the same object for lookup and Json. nodes[nod_id] = { 'id': nod_obj.m_survol_url, # Required by D3 'name': the_survol_nam, # Theoretically, this URL should be HTML unescaped then CGI escaped. 'survol_url': nod_obj.m_survol_url, # Duplicate of 'id' 'survol_universal_alias': nod_obj.m_survol_universal_alias, 'survol_fill': nod_obj.m_color, 'entity_class': nod_obj. m_class, # TODO: Maybe not needed because also in the URL ? 'survol_info_list': nod_obj.m_info_list, 'survol_info_dict': nod_obj.m_info_dict } # This is the graph displayed by D3. graph = {"page_title": page_title, "nodes": nodes, "links": links} _write_json_header(json.dumps(graph, indent=2))
def Rdf2Dot(grph, logfil, stream, CollapsedProperties): fieldsSet = collections.defaultdict(list) # This maps RDFLIB nodes to DOT label names. dictRdf2Dot = {} # This returns the DOT label of a RDFLIB, and creates a new one if necessary. def RdfNodeToDotLabel(x): try: return dictRdf2Dot[x] except KeyError: nodelabel = "nd_%d" % len(dictRdf2Dot) dictRdf2Dot[x] = nodelabel return nodelabel # The QName is an abbreviation of URI reference with the namespace function for XML. # Edge label. # Transforms "http://primhillcomputers.com/ontologies/ppid" into "ppid" # TODO: Beware, a CGI parameter might be there. CGIPROP def qname(x, grph): try: q = grph.compute_qname(x) # q[0] is the shortened namespace "ns" # Could return q[0] + ":" + q[2] return q[2] except: return x # Nothing really interesting at the moment, just hardcodes. return lib_properties.prop_color(prop) # The input is any Python object. # This returns a simple object which can be transformed into a string. # If the input is a container, it returns a HTML table. def FormatElementAux(val, depth=0): if val is None: return "" try: int(val) return val except: pass try: float(val) return val except: pass if isinstance(val, dict): subTable = "" # TODO: Consider using six.iteritems. for subKey, subVal in val.items(): subTd = FormatPair(subKey, subVal, depth + 1) if subTd: subTable += "<tr>%s</tr>" % subTd return "<table border='0'>%s</table>" % subTable # Note: Recursive list are not very visible. if isinstance(val, (list, tuple)): # If this is an empty list or tuple. if not val: # return "(Empty)" # Empty set character in UTF8 return "{" + "∅" + "}" if depth % 2 == 0: subTable = "" for subElement in val: subTd = FormatElement(subElement, depth + 1) subTable += "<tr>%s</tr>" % subTd return "<table border='0'>%s</table>" % subTable else: subTable = "" for subElement in val: subTd = FormatElement(subElement, depth + 1) subTable += subTd return "<table border='0'><tr>%s</tr></table>" % subTable try: decodVal = json.loads(val) return FormatElementAux(decodVal, depth + 1) except ValueError: # It is a string which cannot be converted to json. val = cgi.escape(val) return lib_exports.StrWithBr(val) except TypeError: # "Expected a string or buffer" # It is not a string, so it could be a datetime.datetime val = cgi.escape(str(val)) return lib_exports.StrWithBr(val) return "FormatElement failure" def FormatElement(val, depth=0): if lib_kbase.IsLink(val): valTitle = "FormatElement " + ExternalToTitle(val) valTitleUL = lib_exports.DotUL(valTitle) return "<td align='left' balign='left' border='0' href='%s'>%s</td>" % ( val, valTitleUL) resStr = FormatElementAux(val, depth) return "<td align='left' balign='left' border='0'>%s</td>" % resStr # Prints a key-value pair as two TD tags, to go in an HTML table. def FormatPair(key, val, depth=0): colFirst = "<td align='left' valign='top' border='0'>%s</td>" % lib_exports.DotBold( key) colSecond = FormatElement(val, depth + 1) return colFirst + colSecond # Display in the DOT node the list of its literal properties. def FieldsToHtmlVertical(grph, the_fields): props = {} idx = 0 # TODO: The sort must put at first, some specific keys. # For example, sources_top/nmap_run.py, the port number as an int (Not a string) # Also, filenames, case-sensitive or not. for (key, val) in sorted(the_fields): # This should come first, but it does not so we prefix with "----". Hack ! if key == pc.property_information: # Completely left-aligned. Col span is 2, approximate ratio. val = lib_exports.StrWithBr(val, 2) currTd = "<td align='left' balign='left' colspan='2'>%s</td>" % val elif IsFlatProperty(key): urlTxt = lib_naming.ParseEntityUri(val)[0] splitTxt = lib_exports.StrWithBr(urlTxt, 2) # The text of the link must be underlined. currTd = '<td href="%s" align="left" colspan="2">%s</td>' % ( val, lib_exports.DotUL(splitTxt)) else: key_qname = qname(key, grph) # This assumes: type(val) == 'rdflib.term.Literal' # sys.stderr.write("FORMAT ELEMENT: %s\n" %(dir(val))) if lib_kbase.IsLiteral(val): currTd = FormatPair(key_qname, val.value) else: currTd = FormatPair(key_qname, val) props[idx] = currTd idx += 1 return props # Ca liste les labels des objects qui apparaissent dans les blocs, # et pointent vers le nom du record. dictCollapsedObjectLabelsToSubjectLabels = {} # This contains, for each node (subject), the related node (object) linked # to it with a property to be displayed in tables instead of individual nodes. dictPropsCollapsedSubjectsToObjectLists = {} for collapsPropObj in CollapsedProperties: collapsPropNam = lib_exports.PropToShortPropNam(collapsPropObj) dictPropsCollapsedSubjectsToObjectLists[ collapsPropNam] = collections.defaultdict(list) # TODO: (TRANSLATE THIS) Une premiere passe pour batir l'arbre d'une certaine propriete. # Si pas un DAG, tant pis, ca fera un lien en plus. # ON voulait batir des records, mais les nodes dans un record ne peuvent pas # avoir un URL: Donc ca va pas evidemment. # HTML-LIKE Labels avec PORT et PORTPOS. # CA VA AUSSI SIMPLIFIER L'AFFICHAGE DES TRUCS ENORMES: Modules, Fichiers etc... # Et on pourra trier car il y a un ordre. # Donc ca doit etre facile d'ajouter des proprietes affichees comme ca. logfil.write(TimeStamp() + " Rdf2Dot: First pass\n") # New intermediary node created. def CollapsedLabel(collapsPropNam, subjNam): return "R_" + collapsPropNam + "_" + subjNam # Called mainly from entity.py. If S points vers O, transforms "O" => "R_S:O" # Accordingly we create an edge: "S" => "R_S" def SubjNamFromCollapsed(collapsPropNam, subjNam): #sys.stderr.write("ADDING1 subjNam=%s collapsPropNam=%s\n" % (subjNam,collapsPropNam)) collapsedSubjNam = dictCollapsedObjectLabelsToSubjectLabels[subjNam][ collapsPropNam] #sys.stderr.write("ADDING2 subjNam=%s collapsPropNam=%s\n" % (subjNam,collapsPropNam)) newSubjNam = CollapsedLabel(collapsPropNam, collapsedSubjNam) + ":" + subjNam #sys.stderr.write("ADDED collapsedSubjNam=%s newSubjNam=%s collapsPropNam=%s\n" % (collapsedSubjNam,newSubjNam,collapsPropNam)) return newSubjNam # This is sorted so the result is deterministic. Very small performance impact. # Any order will do as long as the result is always the same. sortedGrph = sorted(grph) # TODO: Loop only on the "collapsed" properties, the ones whose objects must be displayed # in tables, instead of links - if only they have a single subject. Otherwise it cannot work. for subj, prop, obj in sortedGrph: # Objects linked with these properties, are listed in a table, instead of distinct nodes in a graph. if prop in CollapsedProperties: # TODO: We lose the property, unfortunately. Should make a map: subject => prop => object ? subjNam = RdfNodeToDotLabel(subj) propNam = lib_exports.PropToShortPropNam(prop) dictPropsCollapsedSubjectsToObjectLists[propNam][subj].append(obj) # Maybe we already entered it: Not a problem. objNam = RdfNodeToDotLabel(obj) # CollapsedProperties can contain only properties which define a tree, # as visibly the "object" nodes can have one ancestor only. try: # TODO: We should check if a node appears in two tables, # associated to two properties and/or two parent node. dictCollapsedObjectLabelsToSubjectLabels[objNam][ propNam] = subjNam except KeyError: dictCollapsedObjectLabelsToSubjectLabels[objNam] = dict() dictCollapsedObjectLabelsToSubjectLabels[objNam][ propNam] = subjNam # For getting the node of an object, as it might be in a table. def RdfNodeToDotLabelExtended(obj, prop): objNam = RdfNodeToDotLabel(obj) try: dictOfProps = dictCollapsedObjectLabelsToSubjectLabels[objNam] except KeyError: # sys.stderr.write("RdfNodeToDotLabelExtended propNam=%s objNam=%s\n"%(propNam,objNam) ) return objNam # Let's hope there is only one collapsed property for this node. Otherwise, it means # that this node would be displayed in two different tables. It happened... if not prop is None: propNam = lib_exports.PropToShortPropNam(prop) try: # Maybe this property is not collapsed. subjNam = dictOfProps[propNam] except KeyError: prop = None # Maybe the property is not known, if the node is the subject. # Or the property is not collapsed. if prop is None: # In Python3, keys() is an iterable. No need to create a list. #propNam = list(dictOfProps.keys())[0] #propNam = dictOfProps.keys()[0] for propNam in dictOfProps.keys(): break # First property available. subjNam = dictOfProps[propNam] newObjNam = CollapsedLabel(propNam, subjNam) + ":" + objNam return newObjNam # Now we know that we have seen all nodes in a collapsed property. for subj, prop, obj in sortedGrph: if prop in CollapsedProperties: continue # Maybe the subject node belongs to a table, but the property is not known. subjNam = RdfNodeToDotLabelExtended(subj, None) if lib_kbase.IsLink(obj): prp_col = lib_properties.prop_color(prop) # TODO: GENERALIZE THIS TO ALL COMMUTATIVE PROPERTIES. # THAT IS: PROPERTIES WHOSE TRIPLES ARE MERGED WHEN # WE HAVE AT THE SAME TIME: (Subj,Prop,Obj) and (Obj,Prop,Subj). # WHEN THIS HAPPENS, THE ARROW MUST BE BIDIRECTIONAL. # TODO: All commutative relation have bidirectional arrows. # At the moment, only one property can be bidirectional. if prop == pc.property_socket_end: # BEWARE, MAYBE THIS IS A PORT INTO A TABLE. SO IT HAS TO BE PREFIXED BY THE RECORD NAME. objNam = RdfNodeToDotLabelExtended(obj, prop) if (obj, prop, subj) in grph: if subjNam < objNam: stream.write( pattEdgeBiDir % (subjNam, objNam, prp_col, qname(prop, grph))) else: # One connection only: We cannot see the other. stream.write(pattEdgeOrien % (subjNam, objNam, prp_col, qname(prop, grph))) elif prop in [ pc.property_rdf_data_nolist1, pc.property_rdf_data_nolist2, pc.property_rdf_data_nolist3 ]: # TODO: Il suffit de tester si obj est un url de la forme "entity.py" ??? # HTML and images urls can be "flattened" because the nodes have no descendants. # Do not create a node for this. # MIME links displayed in the same column as sub-directory. # Also, it might be enough to test if the object has the form "entity.py" because it has no descendant. # TODO: CGIPROP: Can it have several html or sub-rdf ?? It is necessary ! fieldsSet[subj].append((prop, obj)) else: objNam = RdfNodeToDotLabelExtended(obj, prop) # C est la que si subjNam est dans une liste de dictCollapsedSubjectsToObjectLists, # il faut rajouter devant, le nom du record, c est a dire SON subjNam + "_table_rdf_data:". try: # Syntax with colon required by DOT. propNam = lib_exports.PropToShortPropNam(prop) subjNam = SubjNamFromCollapsed(propNam, subjNam) except KeyError: # sys.stderr.write("PASS subjNam=%s objNam=%s\n"%(subjNam,objNam)) pass stream.write(pattEdgeOrien % (subjNam, objNam, prp_col, qname(prop, grph))) elif obj == None: # No element created in nodes[] fieldsSet[subj].append((prop, "Null")) else: # For Literals. No element created in nodes[] # Literals can be processed according to their type. # Some specific properties cannot have children so they can be stored as literals? # Les proprietes comme "pid", on devrait plutot afficher le lien vers le process, dans la table ??? # Les URLs de certaines proprietes sont affichees en colonnes. # Ou bien pour ces proprietes, on recree un entity.py ?? fieldsSet[subj].append((prop, obj)) logfil.write(TimeStamp() + " Rdf2Dot: Replacing vectors: CollapsedProperties=%d.\n" % (len(CollapsedProperties))) # Now, replaces each vector by a single object containg a HTML table. # TODO: Unfortunately, the prop is lost, which implies that all children are mixed together. def ProcessCollapsedProperties(propNam): dictCollapsedSubjectsToObjectLists = dictPropsCollapsedSubjectsToObjectLists[ propNam] logfil.write(TimeStamp() + " Rdf2Dot: dictCollapsedSubjectsToObjectLists=%d.\n" % (len(dictCollapsedSubjectsToObjectLists))) for subjUrl, nodLst in lib_util.six_iteritems( dictCollapsedSubjectsToObjectLists): subjNam = RdfNodeToDotLabel(subjUrl) subjNamTab = CollapsedLabel(propNam, subjNam) try: # TODO: Cette logique ajoute parfois un niveau de noeud inutile. Plus exactement, ca duplique un noeud. # Ou plus exactement, le noed est represente par deux objects graphiques: # * Un qui a les scripts. # * Un autre qui a la liste HTML qu on fabrique. # => Peut-on imaginer de melanger les deux ?? # Dans WritePatterns: Ajouter le nom du noeud au label. subjNam = SubjNamFromCollapsed(propNam, subjNam) except KeyError: pass # Point from the subject to the table containing the objects. stream.write(pattEdgeOrien % (subjNam, subjNamTab, "GREEN", propNam)) (labText, subjEntityGraphicClass, entity_id) = lib_naming.ParseEntityUri(subjUrl) # Probleme avec les champs: # Faire une premiere passe et reperer les fields, detecter les noms des colonnes, leur attribuer ordre et indice. # Seconde passe pour batir les lignes. # Donc on ordonne toutes les colonnes. # Pour chaque field: les prendre dans le sens du header et quand il y a un trou, colonne vide. # Inutile de trier les field, mais il d'abord avoir une liste complete des champs, dans le bon sens. # CA SUPPOSE QUE DANS FIELDSSET LES KEYS SONT UNIQUES. # SI ON NE PEUT PAS, ALORS ON METTRA DES LISTES. MAIS CETTE CONTRAINTE SIMPLIFIE L'AFFICHAGE. # DOMMAGE QU ON SCANNE LES OBJETS DEUX FOIS UNIQUEMENT POUR AVOIR LES NOMS DES CHAMPS !!!!!!!!!!!!! # TODO: HEURISTIQUE: ON pourrait s'arreter aux dix premiers. Ou bien faire le tri avant ? # On bien prendre les colonnes de la premiere ligne, et recommencer si ca ne marche pas. # Unique columns of the descendant of this subject. rawFieldsKeys = set() for obj in nodLst: # One table per node. rawFieldsKeys.update(fld[0] for fld in fieldsSet[obj]) # sys.stderr.write("rawFieldsKeys BEFORE =%s\n" % str(rawFieldsKeys) ) # Mandatory properties must come at the beginning of the columns of the header, with first indices. # BUG: Si on retire html de cette liste alors qu il y a des valeurs, colonnes absentes. # S il y a du html ou du RDF, on veut que ca vienne en premier. fieldsKeysOrdered = [] for fldPriority in FlatPropertertiesList: try: # Must always be appended. BUT IF THERE IS NO html_data, IS IT WORTH ? # TODO: Remove if not HTML and no sub-rdf. CGIPROP # If the property is never used, exception then next property. rawFieldsKeys.remove(fldPriority) fieldsKeysOrdered.append(fldPriority) except KeyError: pass # This one is always removed because its content is concatenated at the first column. for fldToRemove in [pc.property_information]: try: rawFieldsKeys.remove(fldToRemove) except KeyError: pass # Appends rest of properties, sorted. fieldsKeys = fieldsKeysOrdered + sorted(rawFieldsKeys) # sys.stderr.write("fieldsKeys=%s\n" % str(fieldsKeys) ) # This assumes that the header columns are sorted. keyIndices = { nameKey: indexKey for (indexKey, nameKey) in enumerate(fieldsKeys, 1) } numberKeys = len(keyIndices) + 1 # Apparently, no embedded tables. dictHtmlLines = dict() for objUri in nodLst: # One table per node. subObjId = RdfNodeToDotLabel(objUri) # Beware "\L" which should not be replaced by "<TABLE>" but this is not the right place. subNodUri = objUri.replace('&', '&') try: (subObjNam, subEntityGraphicClass, subEntityId) = lib_naming.ParseEntityUriShort(objUri) except UnicodeEncodeError: sys.stderr.write("UnicodeEncodeError error:%s\n" % (objUri)) (subObjNam, subEntityGraphicClass, subEntityId) = ("Utf problem1", "Utf problem2", "Utf problem3") # sys.stderr.write("subEntityGraphicClass=%s\n"%subEntityGraphicClass) # If this is a script, always displayed on white, even if reletd to a specific entity. # THIS IS REALLY A SHAME BECAUSE WE JUST NEED THE ORIGINAL PROPERTY. if objUri.find("entity.py") < 0: objColor = "#FFFFFF" else: objColor = lib_patterns.EntityClassToColor( subEntityGraphicClass) # This lighter cololor for the first column. objColorLight = lib_patterns.ColorLighter(objColor) # Some colors a bit clearer ? Or take the original color of the class ? td_bgcolor_plain = '<td BGCOLOR="%s" ' % objColor td_bgcolor_light = '<td BGCOLOR="%s" ' % objColorLight td_bgcolor = td_bgcolor_plain # Some columns might not have a value. The first column is for the key. columns = [td_bgcolor + " ></td>"] * numberKeys # Just used for the vertical order of lines, one line per object. title = "" # TODO: CGIPROP. This is not a dict, the same key can appear several times ? for (key, val) in fieldsSet[objUri]: if key == pc.property_information: # This can be a short string only. title += val continue # TODO: This is hard-coded. if IsFlatProperty(key): # In fact, it might also be an internal URL with "entity.py" if lib_kbase.IsLiteral(val): if isinstance(val.value, (list, tuple)): strHtml = FormatElementAux(val.value) sys.stderr.write("val.value=%s\n" % strHtml) tmpCell = td_bgcolor + 'align="left">%s</td>' % strHtml else: tmpCell = td_bgcolor + 'align="left">%s</td>' % val.value else: valTitle = lib_naming.ParseEntityUri(val)[0] valTitleUL = lib_exports.DotUL(valTitle) tmpCell = td_bgcolor + 'href="%s" align="left" >%s</td>' % ( val, valTitleUL) else: try: float(val) tmpCell = td_bgcolor + 'align="right">%s</td>' % val except: # Wraps the string if too long. Can happen only with a literal. tmpCell = td_bgcolor + 'align="left">%s</td>' % lib_exports.StrWithBr( val) idxKey = keyIndices[key] columns[idxKey] = tmpCell if title: title_key = title else: title_key = subObjNam # Maybe the first column is a literal ? if subEntityId != "PLAINTEXTONLY": # WE SHOULD PROBABLY ESCAPE HERE TOO. columns[ 0] = td_bgcolor_light + 'port="%s" href="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri, title_key) else: subNodUri = cgi.escape(subNodUri) columns[ 0] = td_bgcolor_light + 'port="%s" align="LEFT" >%s</td>' % ( subObjId, subNodUri) # Several scripts might have the same help text, so add a number. # "Title" => "Title" # "Title" => "Title/2" # "Title" => "Title/3" etc... # Beware that it is quadratic with the number of scripts with identical info. title_idx = 2 title_uniq = title_key while title_uniq in dictHtmlLines: title_uniq = "%s/%d" % (title_key, title_idx) title_idx += 1 # TODO: L'ordre est base sur les chaines mais devrait etre base sur le contenu. Exemple: # TODO: "(TUT_UnixProcess) Handle=10" vient avant "(TUT_UnixProcess) Handle=2" # TODO: title_uniq devrait etre plutot la liste des proprietes. # TODO: By clicking on the column names, we could change the order. dictHtmlLines[title_uniq] = "".join(columns) # Replace the first column by more useful information. numNodLst = len(nodLst) # TODO: Compute this once for all. eltNam = subEntityGraphicClass.split("/")[-1] if not eltNam: # TODO: This is not the right criteria. Must select if we are listing scripts. eltNam = "script" eltNamPlural = lib_grammar.ToPlural(eltNam, numNodLst) txtElements = "%d %s" % (numNodLst, eltNamPlural) header = '<td border="1">' + lib_exports.DotBold( txtElements) + "</td>" # TODO: Replace each column name with a link which sorts the line based on this column. # The order of columns could be specified with an extra cgi argument with the columns names. for key in fieldsKeys: columnTitle = qname(key, grph) columnTitle = columnTitle.replace("_", " ").capitalize() header += "<td border='1'>" + lib_exports.DotBold( columnTitle) + "</td>" # With an empty key, it comes first when sorting. dictHtmlLines[""] = header # MAYBE SHOULD BE DONE TWICE !!!!! SEE ALSO ELSEWHERE !!!! subjUrlClean = subjUrl.replace('&', '&') # ATTENTION: La forme du record est celle du sujet. # ca veut donc dire qu'on va avoir la meme couleur pour des objets de types # differents s'ils sont dans la meme relation avec un sujet identique ? numFields = len(fieldsKeys) + 1 # The label might be truncated if subjEntityGraphicClass: helpText = "List of " + subjEntityGraphicClass + " objects in " + labText else: helpText = "List of scripts in " + labText # TODO: Le title and the content are not necessarily of the same class. # labTextWithBr is the first line of the table containing nodes linked with the # same property. Unfortunately we have lost this property. labText = lib_exports.TruncateInSpace(labText, 30) labTextWithBr = lib_exports.StrWithBr(labText) labTextWithBr += ": " + propNam if entity_id == "PLAINTEXTONLY": subjUrlClean = "" # This color is the table's contour. lib_patterns.WritePatterned(stream, subjEntityGraphicClass, subjNamTab, helpText, '"#000000"', subjUrlClean, numFields, labTextWithBr, dictHtmlLines) # TODO: Eviter les repetitions de la meme valeur dans une colonne en comparant d une ligne a l autre. # TODO: Si une cellule est identique jusqu a un delimiteur, idem, remplacer par '"'. if CollapsedProperties: for collapsedProp in CollapsedProperties: collapsedPropNam = lib_exports.PropToShortPropNam(collapsedProp) ProcessCollapsedProperties(collapsedPropNam) logfil.write(TimeStamp() + " Rdf2Dot: Display remaining nodes. dictRdf2Dot=%d\n" % len(dictRdf2Dot)) # Now, display the normal nodes, which are not displayed in tables. for objRdfNode, objLabel in lib_util.six_iteritems(dictRdf2Dot): # TODO: Avoids this lookup. if objLabel in dictCollapsedObjectLabelsToSubjectLabels: continue objPropsAsHtml = FieldsToHtmlVertical(grph, fieldsSet[objRdfNode]) labHRef = objRdfNode.replace('&', '&') try: # TODO: Probleme ici: La chaine est deja codee pour HTML ce qui en rend le parsing different # TODO: ... de celui d'un URL deja decode. DOMMAGE: On quote puis unquote !!! (labText, objEntityGraphClass, entity_id) = lib_naming.ParseEntityUri( lib_util.urllib_unquote(objRdfNode)) except UnicodeEncodeError: sys.stderr.write("UnicodeEncodeError error:%s\n" % (objRdfNode)) # WritePatterned receives an list of strings similar to "<td>jhh</td><td>jhh</td><td>jhh</td>" # This function adds <tr> and </tr> on both sides. # This avoids concatenations. # Ampersand are intentionnally doubled, because later on they are replaced twice. # That is, interpreted twice as HTML entities. # This might be temporary until we replace CGI arguments by genuine WMI Monikers. labTextNoAmp = labText.replace("&amp;", " ") labTextNoAmp = labTextNoAmp.strip() labTextClean = lib_exports.StrWithBr(labTextNoAmp) # Two columns because it encompasses the key and the value. if objEntityGraphClass: helpText = labTextNoAmp + " is a " + objEntityGraphClass else: if labTextClean.startswith("http"): helpText = "External URL " + labTextNoAmp else: helpText = "Script " + labTextNoAmp # This color is the object's contour. lib_patterns.WritePatterned(stream, objEntityGraphClass, objLabel, helpText, '"#000000"', labHRef, 2, labTextClean, objPropsAsHtml) logfil.write(TimeStamp() + " Rdf2Dot: Leaving\n") stream.write("}\n")
def DispClassObjects(dictSubjPropObj): listPropsTdDoubleColSpan = [ pc.property_information, pc.property_rdf_data_nolist2, pc.property_rdf_data_nolist1 ] WrtAsUtf('<table class="class_objects">') # The subjects must be sorted by their title. lstTuplesSubjects = [] for aSubj in dictSubjPropObj: subj_str = str(aSubj) (subj_title, entity_graphic_class, entity_id) = lib_naming.ParseEntityUri(subj_str) lstTuplesSubjects.append( (aSubj, subj_str, subj_title, entity_graphic_class, entity_id)) # Sorted by the title of the subject, which is the third value of the tuple. lib_util.natural_sort_list(lstTuplesSubjects, key=lambda tup: tup[2]) # Now it iterates on the sorted list. # This reuses all the intermediate values. for aSubj, subj_str, subj_title, entity_graphic_class, entity_id in lstTuplesSubjects: dictPred = dictSubjPropObj[aSubj] arrayGraphParams = lib_patterns.TypeToGraphParams(entity_graphic_class) # "Graphic_shape","Graphic_colorfill","Graphic_colorbg","Graphic_border","Graphic_is_rounded" colorClass = arrayGraphParams[1] # Total number of lines. cntPreds = 0 for aPred in dictPred: lstObjs = dictPred[aPred] cntPreds += len(lstObjs) mustWriteColOneSubj = True subj_str_with_mode = UrlInHtmlMode(subj_str) # The predicates, i.e. the properties associated a subject with an object, # must be alphabetically sorted. for aPred in lib_util.natural_sorted(dictPred): lstObjs = dictPred[aPred] predStr = lib_exports.AntiPredicateUri(str(aPred)) cntObjs = len(lstObjs) mustWriteColOnePred = True # The objects must be sorted by title. lstTuplesObjs = [] for anObj in lstObjs: obj_str = str(anObj) obj_str = DesHex(obj_str) obj_title = lib_naming.ParseEntityUri(obj_str)[0] lstTuplesObjs.append((anObj, obj_str, obj_title)) # Sorted by the title of the object, which is the third value of the tuple. lib_util.natural_sort_list(lstTuplesObjs, key=lambda tup: tup[2]) for anObj, obj_str, obj_title in lstTuplesObjs: WrtAsUtf('<tr bgcolor="%s">' % colorClass) if mustWriteColOneSubj: WrtAsUtf( '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>' % (str(cntPreds), subj_str_with_mode, subj_title)) mustWriteColOneSubj = False if mustWriteColOnePred: if aPred not in listPropsTdDoubleColSpan: WrtAsUtf('<td valign="top" rowspan="%s">%s</td>' % (str(cntObjs), predStr)) mustWriteColOnePred = False if aPred in listPropsTdDoubleColSpan: colSpan = 2 else: colSpan = 1 dispMimeUrls = True WrtAsUtf('<td colspan="%d">' % (colSpan)) if dispMimeUrls: if lib_kbase.IsLink(anObj): objStrClean = lib_util.UrlNoAmp(obj_str) mimeType = lib_mime.GetMimeTypeFromUrl(objStrClean) if mimeType: if mimeType.startswith("image/"): WrtAsUtf( """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>""" % (obj_str, obj_str, obj_title)) else: WrtAsUtf("""<a href="%s">%s</a>""" % (obj_str, obj_title)) else: url_with_mode = lib_util.AnyUriModed( obj_str, "html") WrtAsUtf("""<a href="%s">%s</a>""" % (url_with_mode, obj_title)) else: WrtAsUtf('%s' % (obj_str)) else: if lib_kbase.IsLink(anObj): url_with_mode = UrlInHtmlMode(obj_str) WrtAsUtf('<a href="%s">%s</a>' % (url_with_mode, obj_title)) else: WrtAsUtf('%s' % (obj_str)) WrtAsUtf("</td>") WrtAsUtf("</tr>") WrtAsUtf(" </table>")
def DisplayLevelTable(subj, depthMenu=1): """ Top-level should always be none. TODO: Have another version which formats all cells the same way. For this, have a first pass which counts, at each node, the number of sub-nodes. Then a second pass which uses thiese counts and the current depth, to calculate the rowspan and colspan of each cell. Although elegant, it is not garanteed to work. """ WrtAsUtf('<table class="table_scripts_titles">') try: mapProps = dictScripts[subj] except KeyError: return def ExtractTitleFromMapProps(mapProps): if len(mapProps) != 1: return None for oneProp in mapProps: if oneProp != pc.property_information: return None lstStr = mapProps[oneProp] if len(lstStr) != 1: return None retStr = lstStr[0] if lib_kbase.IsLink(retStr): return None return str(retStr) WrtAsUtf('<tr>') depthMenu += 1 subj_uniq_title = ExtractTitleFromMapProps(mapProps) if subj: subj_str = str(subj) WrtAsUtf("<td valign='top' rowspan='%d'>" % len(mapProps)) if lib_kbase.IsLink(subj): url_with_mode = UrlInHtmlMode(subj_str) if subj_uniq_title: subj_uniq_title_not_none = subj_uniq_title else: subj_uniq_title_not_none = "No title" WrtAsUtf('<a href="' + url_with_mode + '">' + subj_uniq_title_not_none + "</a>") else: WrtAsUtf(subj_str) WrtAsUtf("</td>") if not subj_uniq_title: for oneProp in mapProps: lstObjs = mapProps[oneProp] WrtAsUtf('<td>') WrtAsUtf('<table class="table_scripts_links">') for oneObj in lstObjs: if oneObj is None: continue WrtAsUtf('<tr>') WrtAsUtf('<td>') try: mapPropsSub = dictScripts[oneObj] DisplayLevelTable(oneObj, depthMenu) except KeyError: WrtAsUtf("Script error: " + str(oneObj)) WrtAsUtf('</td>') WrtAsUtf('</tr>') WrtAsUtf('</table>') WrtAsUtf('</td>') WrtAsUtf('</tr>') WrtAsUtf("</table>")
def _rdf_graph_to_dot_stream(grph, logfil, stream, collapsed_properties, commutative_properties): """Used for transforming into SVG format. If from entity.py, collapsed_properties = pc.property_directory,pc.property_script """ fields_set = collections.defaultdict(list) # This maps RDFLIB nodes to DOT label names. dict_rdf2_dot = {} # This returns the DOT label of a RDFLIB, and creates a new one if necessary. def _rdf_node_to_dot_label(x): try: return dict_rdf2_dot[x] except KeyError: nodelabel = "nd_%d" % len(dict_rdf2_dot) dict_rdf2_dot[x] = nodelabel return nodelabel # The input is any Python object. # This returns a simple object which can be transformed into a string. # If the input is a container, it returns a HTML table. def _format_element_aux(val, depth=0): if val is None: return "" try: int(val) return val except: pass try: float(val) return val except: pass if isinstance(val, dict): sub_table = "" # TODO: Consider using six.iteritems. for sub_key, sub_val in val.items(): sub_td = _format_pair(sub_key, sub_val, depth + 1) if sub_td: sub_table += "<tr>%s</tr>" % sub_td return "<table border='0'>%s</table>" % sub_table # Note: Recursive list are not very visible. if isinstance(val, (list, tuple)): # If this is an empty list or tuple. if not val: # Empty set character in UTF8 return "{" + "∅" + "}" if depth % 2 == 0: sub_table = "" for sub_element in val: sub_td = _format_element(sub_element, depth + 1) sub_table += "<tr>%s</tr>" % sub_td return "<table border='0'>%s</table>" % sub_table else: sub_table = "" for sub_element in val: sub_td = _format_element(sub_element, depth + 1) sub_table += sub_td return "<table border='0'><tr>%s</tr></table>" % sub_table try: decod_val = json.loads(val) return _format_element_aux(decod_val, depth + 1) except ValueError: # It is a string which cannot be converted to json. val = lib_util.html_escape(val) return _str_with_br(val) except TypeError: # "Expected a string or buffer" # It is not a string, so it could be a datetime.datetime val = lib_util.html_escape(str(val)) return _str_with_br(val) return "_format_element failure" def _format_element(val, depth=0): if lib_kbase.IsLink(val): val_title = "_format_element " + external_url_to_title(val) val_title_ul = _dot_ul(val_title) return "<td align='left' balign='left' border='0' href='%s'>%s</td>" % (val, val_title_ul) res_str = _format_element_aux(val, depth) return "<td align='left' balign='left' border='0'>%s</td>" % res_str # Prints a key-value pair as two TD tags, to go in an HTML table. def _format_pair(key, val, depth=0): col_first = "<td align='left' valign='top' border='0'>%s</td>" % _dot_bold(key) col_second = _format_element(val, depth+1) return col_first + col_second # Display in the DOT node the list of its literal properties. def fields_to_html_vertical(grph, the_fields): props = {} idx = 0 # TODO: The sort must put at first, some specific keys. # For example, sources_top/nmap_run.py, the port number as an int (Not a string) # Also, filenames, case-sensitive or not. for key, val in sorted(the_fields): # This should come first, but it does not so we prefix with "----". Hack ! if key == pc.property_information: # Completely left-aligned. Col span is 2, approximate ratio. val = _str_with_br(val, 2) curr_td = "<td align='left' balign='left' colspan='2'>%s</td>" % val elif is_flat_property(key) : url_txt = lib_naming.ParseEntityUri(val)[0] split_txt = _str_with_br(url_txt, 2) # The text of the link must be underlined. curr_td = '<td href="%s" align="left" colspan="2">%s</td>' % (val, _dot_ul(split_txt)) else: key_qname = lib_kbase.qname(key, grph) # This assumes: type(val) == 'rdflib.term.Literal' if lib_kbase.IsLiteral(val): curr_td = _format_pair(key_qname, val.value) else: curr_td = _format_pair(key_qname, val) props[idx] = curr_td idx += 1 return props # This lists the labels of objects which appear in the blocks, # and point to the name of records. dict_collapsed_object_labels_to_subject_labels = {} # This contains, for each node (subject), the related node (object) linked # to it with a property to be displayed in tables instead of individual nodes. dict_props_collapsed_subjects_to_object_lists = {} for collaps_prop_obj in collapsed_properties: # TODO: Les arguments CGI de l'URL ils servent peut-etre a trier. # Donc on doit les garder pour trier une fois qu'on a rassemble. collaps_prop_nam = lib_exports.PropToShortPropNam(collaps_prop_obj) dict_props_collapsed_subjects_to_object_lists[collaps_prop_nam] = collections.defaultdict(list) logfil.write(lib_util.TimeStamp() + " Rdf2Dot: First pass\n") def collapsed_label(collaps_prop_nam, subj_nam): """New intermediary node created.""" assert collaps_prop_nam.find("#") < 0 return "R_" + collaps_prop_nam + "_" + subj_nam # Called mainly from entity.py. If S points vers O, transforms "O" => "R_S:O" # Accordingly we create an edge: "S" => "R_S" def subj_nam_from_collapsed(collaps_prop_nam, subj_nam): collapsed_subj_nam = dict_collapsed_object_labels_to_subject_labels[subj_nam][collaps_prop_nam] new_subj_nam = collapsed_label(collaps_prop_nam, collapsed_subj_nam) + ":" + subj_nam return new_subj_nam # This is sorted so the result is deterministic. Very small performance impact. # Any order will do as long as the result is always the same for the same URL, if the content is identical. sorted_grph = sorted(grph) # TODO: Loop only on the "collapsed" properties, the ones whose objects must be displayed # in tables, instead of links - if only they have a single subject. Otherwise it cannot work. for subj, prop, obj in sorted_grph: # Objects linked with these properties, are listed in a table, instead of distinct nodes in a graph. if prop in collapsed_properties: # TODO: We lose the property, unfortunately. Should make a map: subject => prop => object ? subj_nam = _rdf_node_to_dot_label(subj) prop_nam = lib_exports.PropToShortPropNam(prop) dict_props_collapsed_subjects_to_object_lists[prop_nam][subj].append(obj) # Maybe we already entered it: Not a problem. obj_nam = _rdf_node_to_dot_label(obj) # collapsed_properties can contain only properties which define a tree, # as visibly the "object" nodes can have one ancestor only. try: # TODO: We should check if a node appears in two tables, # associated to two properties and/or two parent node. dict_collapsed_object_labels_to_subject_labels[obj_nam][prop_nam] = subj_nam except KeyError: dict_collapsed_object_labels_to_subject_labels[obj_nam] = {prop_nam: subj_nam} # For getting the node of an object, as it might be in a table. def rdf_node_to_dot_label_extended(obj, prop): obj_nam = _rdf_node_to_dot_label(obj) try: dict_of_props = dict_collapsed_object_labels_to_subject_labels[obj_nam] except KeyError: return obj_nam # Let's hope there is only one collapsed property for this node. Otherwise, it means # that this node would be displayed in two different tables. It happened... if not prop is None: prop_nam = lib_exports.PropToShortPropNam(prop) try: # Maybe this property is not collapsed. subj_nam = dict_of_props[prop_nam] except KeyError: prop = None # Maybe the property is not known, if the node is the subject. # Or the property is not collapsed. if prop is None: # In Python3, keys() is an iterable. No need to create a list. for sub_prop_nam in dict_of_props.keys(): break # First property available. subj_nam = dict_of_props[sub_prop_nam] new_obj_nam = collapsed_label(sub_prop_nam, subj_nam) + ":" + obj_nam return new_obj_nam # Now we know that we have seen all nodes in a collapsed property. for subj, prop, obj in sorted_grph: if prop in collapsed_properties: continue # Maybe the subject node belongs to a table, but the property is not known. subj_nam = rdf_node_to_dot_label_extended(subj, None) if lib_kbase.IsLink(obj): prp_col = lib_properties.prop_color(prop) # TODO: REMOVE THIS HARDCODE, GENERALIZE THIS TO ALL COMMUTATIVE PROPERTIES, # TODO: PROPERTIES WHOSE TRIPLES ARE MERGED WHEN SIMULTANEOUSLY: (Subj,Prop,Obj) and (Obj,Prop,Subj). # TODO: WHEN THIS HAPPENS, THE ARROW MUST BE BIDIRECTIONAL. Commutative triples have bidirectional arrows. # TODO: Look for "commutative_property" and lib_properties.add_property_metadata_to_graph() # TODO: At the moment, only one property can be bidirectional: property_socket_end # if prop == pc.property_socket_end: if prop in commutative_properties: # BEWARE, MAYBE THIS IS A PORT INTO A TABLE. SO IT HAS TO BE PREFIXED BY THE RECORD NAME. obj_nam = rdf_node_to_dot_label_extended(obj, prop) if (obj, prop, subj) in grph : if subj_nam < obj_nam: stream.write( _pattern_edge_bidirect % (subj_nam, obj_nam, prp_col, lib_kbase.qname(prop, grph))) else: # One connection only: We cannot see the other. stream.write( _pattern_edge_oriented % (subj_nam, obj_nam, prp_col, lib_kbase.qname(prop, grph))) elif prop in _flat_properties_list: # HTML and images urls can be "flattened" because the nodes have no descendants. # Do not create a node for this. # MIME links displayed in the same column as sub-directory. # Also, it might be enough to test if the object has the form "entity.py" because it has no descendant. # TODO: CGIPROP: Can it have several html or sub-rdf ?? It is necessary ! fields_set[subj].append((prop, obj)) else: obj_nam = rdf_node_to_dot_label_extended(obj, prop) # If subj_nam is in a list of dict_collapsed_subjects_to_object_lists, # one must add at front, the record name, i.e. its subj_nam + "_table_rdf_data:". try: # Syntax with colon required by DOT. prop_nam = lib_exports.PropToShortPropNam(prop) subj_nam = subj_nam_from_collapsed(prop_nam, subj_nam) except KeyError: pass stream.write(_pattern_edge_oriented % (subj_nam, obj_nam, prp_col, lib_kbase.qname(prop, grph))) elif obj == None: # No element created in nodes[] fields_set[subj].append((prop, "Null")) else: # For Literals. No element created in nodes[] # Literals can be processed according to their type. # Some specific properties cannot have children so they can be stored as literals? fields_set[subj].append((prop, obj)) logfil.write(lib_util.TimeStamp() + " Rdf2Dot: Replacing vectors.\n") logfil.write(lib_util.TimeStamp() + " Rdf2Dot: Number of collapsed properties=%d.\n" % len(collapsed_properties)) # Now, replaces each vector by a single object containg a HTML table. # TODO: Unfortunately, the prop is lost, which implies that all children are mixed together. def _process_collapsed_properties(prop_nam): dict_collapsed_subjects_to_object_lists = dict_props_collapsed_subjects_to_object_lists[prop_nam] logfil.write(lib_util.TimeStamp()+" Rdf2Dot: dict_collapsed_subjects_to_object_lists=%d.\n" % len(dict_collapsed_subjects_to_object_lists)) for subj_url, nod_lst in six.iteritems(dict_collapsed_subjects_to_object_lists): subj_nam = _rdf_node_to_dot_label(subj_url) subj_nam_tab = collapsed_label(prop_nam, subj_nam) try: # TODO: This logic adds an extra level of node: Try to flatten the tree. subj_nam = subj_nam_from_collapsed(prop_nam, subj_nam) except KeyError: pass # This points from the subject to the table containing the objects. # TODO: This color should be a parameter. stream.write(_pattern_edge_oriented % (subj_nam, subj_nam_tab, "GREEN", prop_nam)) lab_text, subj_entity_graphic_class, entity_id = lib_naming.ParseEntityUri(subj_url) # At the moment, two passes are necessary: # * A first pass to create the total list of fields, because they might be a bit different # from one record to the other. The column names of these fields get an unique index number # and can therefore be sorted. # * A second pass uses these result, to display the lines. # # This could be faster by assuming that the first ten or twenty columns have all the fields. # We could then start the second pass, and if an undetected column is found, # then restart from the beginning, as it is done now. # Unique columns of the descendant of this subject. raw_fields_keys = set() for obj in nod_lst: # One table per node. raw_fields_keys.update(fld[0] for fld in fields_set[obj]) # Mandatory properties must come at the beginning of the columns of the header, with first indices. fields_keys_ordered = [] for fld_priority in _flat_properties_list: try: # Must always be appended. BUT IF THERE IS NO html_data, IS IT WORTH ? # If the property is never used, exception then next property. raw_fields_keys.remove(fld_priority) fields_keys_ordered.append(fld_priority) except KeyError: pass # This one is always removed because its content is concatenated at the first column. for fld_to_remove in [pc.property_information]: try: raw_fields_keys.remove(fld_to_remove) except KeyError: pass # Appends rest of properties which are the column names, alphabetically sorted. fields_keys = fields_keys_ordered + sorted(raw_fields_keys) # This assumes that the header columns are sorted by alphabetical order. key_indices = {name_key: index_key for (index_key, name_key) in enumerate(fields_keys, 1)} number_keys = len(key_indices)+1 # Apparently, no embedded tables. dict_html_lines = dict() for obj_uri in nod_lst: # One table per node. sub_obj_id = _rdf_node_to_dot_label(obj_uri) # Beware "\L" which should not be replaced by "<TABLE>" but this is not the right place. sub_nod_uri = obj_uri.replace('&', '&') try: sub_obj_nam, sub_entity_graphic_class, sub_entity_id = lib_naming.parse_entity_uri_short(obj_uri) except UnicodeEncodeError: logging.warning("UnicodeEncodeError error:%s", obj_uri) sub_obj_nam, sub_entity_graphic_class, sub_entity_id = ("Utf err 1", "Utf err 2", "Utf err 3") # If this is a script, always displayed on white, even if related to a specific entity. # THIS IS REALLY A SHAME BECAUSE WE JUST NEED THE ORIGINAL PROPERTY. if obj_uri.find("entity.py") < 0: obj_color_plain = "#FFFFFF" else: obj_color_plain = lib_patterns.EntityClassToColor(sub_entity_graphic_class) # This lighter color for the first column. obj_color_light = lib_patterns.color_lighter(obj_color_plain) # Some colors a bit clearer ? Or take the original color of the class ? td_bgcolor_plain = '<td BGCOLOR="%s" ' % obj_color_plain td_bgcolor_light = '<td BGCOLOR="%s" ' % obj_color_light # Some columns might not have a value. The first column is for the key. html_columns = [td_bgcolor_plain + " ></td>"] * number_keys # Just used for the vertical order of lines, one line per object. concatenated_info_values = "" for key, val in fields_set[obj_uri]: # TODO: This property is by default the sorting key: # TODO: This can be a parameter for lists of classes <MY_Class> # TODO: ... by adding triplets of the form: (<MY_Class>, sorting_key, pc.property_information) if key == pc.property_information: # This can be a short string only. # Instead of concatenation, consider a list, or use an unique delimiter. concatenated_info_values += val # If there is a key, it overrides sub_entity_id = "NOT_" + "PLAINTEXTONLY" # val continue # TODO: This is hard-coded. if is_flat_property(key) : # In fact, it might also be an internal URL with "entity.py" if lib_kbase.IsLiteral(val): if isinstance(val.value, (list, tuple)): str_html = _format_element_aux(val.value) tmp_cell = 'align="left">%s</td>' % str_html else: tmp_cell = 'align="left">%s</td>' % val.value else: # This displays objects in a table: The top-level object must be # in the same host, so there is no need to display a long label. val_title = lib_naming.parse_entity_uri_short(val)[0] assert isinstance(val_title, (six.text_type, six.binary_type)) # This could probably be replaced by "str" # There might be non-ascii characters such as accents etc... try: val_title.encode('ascii') except UnicodeEncodeError: val_title = "Not ascii" val_title_ul = _dot_ul(val_title) tmp_cell = 'href="%s" align="left" >%s</td>' % (val, val_title_ul) else: try: float(val) tmp_cell = 'align="right">%s</td>' % val except: # Wraps the string if too long. Can happen only with a literal. tmp_cell = 'align="left">%s</td>' % _str_with_br(val) idx_key = key_indices[key] html_columns[idx_key] = td_bgcolor_plain + tmp_cell if concatenated_info_values: title_key = concatenated_info_values else: title_key = sub_obj_nam # Maybe the first column is a literal, and not an object ? if sub_entity_id != "PLAINTEXTONLY": # TODO: WE SHOULD PROBABLY ESCAPE HERE TOO. # For example, this displays the column labelled with pc.property_information tmp_col_0 = 'port="%s" href="%s" align="LEFT" >%s</td>' % (sub_obj_id, sub_nod_uri, title_key) else: sub_nod_uri = lib_util.html_escape(sub_nod_uri) # For example, this displays the title of another table: Typically sub-scripts. # The title itself is not an URL. tmp_col_0 = 'port="%s" align="LEFT" >%s</td>' % (sub_obj_id, sub_nod_uri) html_columns[0] = td_bgcolor_light + tmp_col_0 # concatenated_info_values # Several scripts might have the same help text, so add a number. # "Title" => "Title" # "Title" => "Title/2" # "Title" => "Title/3" etc... # Beware that it is quadratic with the number of scripts with identical info. title_idx = 2 title_key_uniq = title_key while title_key_uniq in dict_html_lines: title_key_uniq = "%s/%d" % (title_key, title_idx) title_idx += 1 # TODO: The sorting order is based on these strings but should rather be based on the content. # TODO: For example, "(TUT_UnixProcess) Handle=10" comes before "(TUT_UnixProcess) Handle=2". # TODO: This is later sorted by the function lib_util.natural_sort_list. # TODO: Or: title_key_uniq should rather be replaced by the list of properties, for example. # TODO: By clicking on the column names, we could change the order. # TODO: Another possibility is to have a "key" metadata which would replace title_key_uniq. dict_html_lines[title_key_uniq] = "".join(html_columns) # Replace the first column by more useful information. num_nod_lst = len(nod_lst) # WBEM and WMI classes have the syntax: "ns1/ns2/ns3:class" and the class it self can have base classes. # Survol classes have the syntax: "dir/dir/dir/class": This considers that namespaces are not really # necessary and can be replaced by classes. Also, there is a one-to-one match between the class inheritance # tree and its directory. # If Survol had to be started from scratch, there would be one Python class per survol class, # and they would be stored in the top dir "root/cimv2" ... it is not too late ! # # This strips the upper directories: "mysql/instance" or "oracle/table", if this is a Survol class elt_nam = sub_entity_graphic_class.split("/")[-1] # This strips the namespace: "root/cimv2:CIM_LogicalElement", if this is a WBEM or WMI class. elt_nam = elt_nam.split(":")[-1] if not elt_nam: # TODO: This is not the right criteria. Must select if we are listing scripts. elt_nam = "script" elt_nam_plural = lib_grammar.to_plural(elt_nam, num_nod_lst) txt_elements = "%d %s" % (num_nod_lst, elt_nam_plural) header = '<td border="1">%s</td>' % _dot_bold(txt_elements) # TODO: Replace each column name with a link which sorts the line based on this column. # The order of columns could be specified with an extra cgi argument with the columns names. for key in fields_keys: column_title = lib_kbase.qname(key, grph) column_title = column_title.replace("_"," ").capitalize() header += "<td border='1'>%s</td>" % _dot_bold(column_title) # With an empty key, it comes first when sorting. dict_html_lines[""] = header # MAYBE SHOULD BE DONE TWICE !!!!! SEE ALSO ELSEWHERE !!!! subj_url_clean = subj_url.replace('&', '&') # BEWARE: The shape and the color of this HTML table is from the subjects, # because the elements can be of different classes, even if the share the same predicate. # TODO: Each row should have its own color according to its class. num_fields = len(fields_keys)+1 # The rows of this HTML table could belong to different classes: # What the shared is the predicate. Hence, the predicate, property name is used as a title. prop_nam_plural = lib_grammar.to_plural(prop_nam, None) help_text = "List of " + prop_nam_plural + " in " + lab_text # TODO: The title and the content are not necessarily of the same class. # lab_text_with_br is the first line of the table containing nodes linked with the # same property. Unfortunately we have lost this property. lab_text = _truncate_in_space(lab_text, 30) lab_text_with_br = _str_with_br(lab_text) lab_text_with_br += ": " + prop_nam # No object with this script. if entity_id == "PLAINTEXTONLY": subj_url_clean = "" # This color is the table's contour. lib_patterns.WritePatterned( stream, subj_entity_graphic_class, subj_nam_tab, help_text, '"#000000"', subj_url_clean, num_fields, lab_text_with_br, dict_html_lines) # TODO: Sometimes, the same value is repeated in contiguous celles of the sames columns. # TODO: This could be avoided with the character '"': One just need to compare the values # TODO: ... consecutive cells of the same column. # TODO: One can even do that if the first N words of a following cell are identical. if collapsed_properties : for collapsed_prop in collapsed_properties: collapsed_prop_nam = lib_exports.PropToShortPropNam(collapsed_prop) _process_collapsed_properties(collapsed_prop_nam) logfil.write(lib_util.TimeStamp() + " Rdf2Dot: Display remaining nodes. dict_rdf2_dot=%d\n" % len(dict_rdf2_dot)) # Now, display the normal nodes, which are not displayed in tables. for obj_rdf_node, obj_label in six.iteritems(dict_rdf2_dot): # TODO: Avoids this lookup. if obj_label in dict_collapsed_object_labels_to_subject_labels: continue obj_props_as_html = fields_to_html_vertical(grph, fields_set[obj_rdf_node]) lab_href = obj_rdf_node.replace('&', '&') try: # TODO: The chain is already encoded for HTML, so the parsing is different # TODO: ... of an URL already encoded. They are quoted then unquoted. lab_text, obj_entity_graph_class, entity_id = lib_naming.ParseEntityUri( lib_util.urllib_unquote(obj_rdf_node)) except UnicodeEncodeError: logging.warning("UnicodeEncodeError error:%s", obj_rdf_node) # WritePatterned receives an list of strings similar to "<td>jhh</td><td>jhh</td><td>jhh</td>" # This function adds <tr> and </tr> on both sides. # This avoids concatenations. # Ampersand are intentionally doubled, because later on they are replaced twice. # That is, interpreted twice as HTML entities. # This might be temporary until we replace CGI arguments by genuine WMI Monikers. lab_text_no_amp = lab_text.replace("&amp;", " ") lab_text_no_amp = lab_text_no_amp.strip() lab_text_clean = _str_with_br(lab_text_no_amp) # Two columns because it encompasses the key and the value. if obj_entity_graph_class: help_text = lab_text_no_amp if not help_text: help_text = "Top-level script" # This condition is for WMI and WBEM where the name of the node is also a class or a namespace. # This is a bit convoluted, and just for nicer display. # "root/cimv2 (WBEM subclasses) at http://vps516494.ovh.net:5988 is a root/cimv2:" # "wmi_namespace is a wmi_namespace" elif not lab_text_no_amp.startswith(obj_entity_graph_class.replace(":", " ")): if obj_entity_graph_class: # "is a" or "is an" the_article = lib_grammar.indefinite_article(obj_entity_graph_class) help_text += " is %s %s" % (the_article, obj_entity_graph_class) else: if lab_text_clean.startswith("http"): help_text = "External URL " + lab_text_no_amp else: help_text = "Script " + lab_text_no_amp # This color is the object's contour. lib_patterns.WritePatterned( stream, obj_entity_graph_class, obj_label, help_text, '"#000000"', lab_href, 2, lab_text_clean, obj_props_as_html) logfil.write(lib_util.TimeStamp() + " Rdf2Dot: Leaving\n") stream.write("}\n")
def Grph2Json(page_title, error_msg, isSubServer, parameters, grph): # Must be reset to zero between several executions, when run by WSGI. global NodeJsonNumber NodeJsonNumber = 0 # It contains a cache because the same nodes may appear several times. def NodeToJsonObj(theNod): try: return NodeToJsonObj.dictNod2Json[theNod] except KeyError: jsonObj = NodeJson(theNod) NodeToJsonObj.dictNod2Json[theNod] = jsonObj return jsonObj NodeToJsonObj.dictNod2Json = dict() links = [] for subj, pred, obj in grph: # This applies only to entity.py : In rendering based on Json, scripts are not displayed as nodes, # but in hierarchical menus. The node must not appear at all. if pred == pc.property_script: sys.stderr.write("continue subj=%s obj=%s\n" % (subj, obj)) continue # Normal data scripts are not accepted. This should apply only to file_directory.py and file_to_mime.py if not ScriptForJson(subj): continue if not ScriptForJson(obj): continue subjObj = NodeToJsonObj(subj) #subj_id = subjObj.m_index subj_id = subjObj.m_survol_url propNam = PropToShortPropNam(pred) # TODO: BUG: If several nodes for the same properties, only the last one is kept. if lib_kbase.IsLink(obj): objObj = NodeToJsonObj(obj) #obj_id = objObj.m_index obj_id = objObj.m_survol_url links.extend([{ 'source': subj_id, 'target': obj_id, 'survol_link_prop': propNam }]) # TODO: Add the name corresponding to the URL, in m_info_dict so that some elements # of the tooltip would be clickable. On the other hand, one just need to merge # the nodes relative to the object, by right-clicking. elif lib_kbase.IsLiteral(obj): if pred == pc.property_information: try: subjObj.m_info_list.append(str(obj.value)) except UnicodeEncodeError: # 'ascii' codec can't encode character u'\xf3' in position 17: ordinal not in range(128) # https://stackoverflow.com/questions/9942594/unicodeencodeerror-ascii-codec-cant-encode-character-u-xa0-in-position-20 subjObj.m_info_list.append(obj.value.encode('utf-8')) else: if isinstance(obj.value, lib_util.six_integer_types) or isinstance( obj.value, lib_util.six_string_types): subjObj.m_info_dict[propNam] = obj.value else: # If the value cannot be serializable to JSON. subjObj.m_info_dict[propNam] = type(obj.value).__name__ else: raise "Cannot happen here" # Now, this creates the nodes sent as json objects. numNodes = len(NodeToJsonObj.dictNod2Json) # sys.stderr.write("Grph2Json numNodes=%d\n"%numNodes) nodes = [None] * numNodes for nod in NodeToJsonObj.dictNod2Json: nodObj = NodeToJsonObj.dictNod2Json[nod] nod_titl = nodObj.m_label nod_id = nodObj.m_index obj_link = nod # sys.stderr.write("nod_titl=%s obj_link=%s\n"%(nod_titl,obj_link)) # The URL must not contain any HTML entities when in a XML or SVG document, # and therefore must be escaped. Therefore they have to be unescaped when transmitted in JSON. # This is especially needed for RabbitMQ because the parameter defining its connection name # has the form: "Url=LOCALHOST:12345,Connection=127.0.0.1:51748 -> 127.0.0.1:5672" # HTTP_MIME_URL the_survol_nam = lib_util.survol_HTMLParser().unescape( nod_titl) # MUST UNESCAPE HTML ENTITIES ! # TODO: Use the same object for lookup and Json. nodes[nod_id] = { 'id': nodObj.m_survol_url, # Required by D3 'name': the_survol_nam, # Theoretically, this URL should be HTML unescaped then CGI escaped. 'survol_url': nodObj.m_survol_url, # Duplicate of 'id' 'survol_universal_alias': nodObj.m_survol_universal_alias, 'survol_fill': nodObj.m_color, 'entity_class': nodObj.m_class, # TODO: Maybe not needed because also in the URL ? 'survol_info_list': nodObj.m_info_list, 'survol_info_dict': nodObj.m_info_dict } graph = {} graph["page_title"] = page_title graph["nodes"] = nodes graph["links"] = links WriteJsonHeader(json.dumps(graph, indent=2))