def ParseEntitySurvolUri(uprs, longDisplay, force_entity_ip_addr): # sys.stderr.write("KnownScriptToTitle filScript=%s uprs=%s\n"%(filScript,str(uprs))) # uprs=ParseResult( # scheme=u'http', # netloc=u'127.0.0.1:8000', # path=u'/survol/survolcgi.py', # params='', # query=u'script=/entity.py&xid=Win32_UserAccount.Domain=rchateau-HP,Name=rchateau', # fragment='') # Maybe the script is run in the CGI script. # If so, we have to rebuild a valid URL. uprsQuery = uprs.query # Apparently the URL might contain "&" and "&" playing the same role. # It does not matter as it is purely cosmetic. # uprsQuery = uprsQuery.replace("&","&") uprsQuery = lib_util.UrlNoAmp(uprsQuery) spltCgiArgs = uprsQuery.split("&") #spltCgiArgs = uprsQuery.split("&") queryRebuild = "" queryDelim = "?" scriptRebuilt = None for oneSplt in spltCgiArgs: spltKV = oneSplt.split("=") # sys.stderr.write("spltKV=%s\n"%spltKV) if spltKV[0] == "script": scriptRebuilt = "=".join(spltKV[1:]) else: queryRebuild += queryDelim + oneSplt queryDelim = "&" if scriptRebuilt: urlRebuilt = uprs.scheme + "://" + uprs.netloc + scriptRebuilt + queryRebuild # sys.stderr.write("ParseEntitySurvolUri urlRebuilt=%s\n"%(urlRebuilt)) # ( labText, subjEntityGraphicClass, entity_id) return ParseEntityUri(urlRebuilt, longDisplay, force_entity_ip_addr) else: return ("Incomplete CGI script:" + str(uprs), "Unknown subjEntityGraphicClass", "Unknown entity_id")
def _display_class_objects_no_jinja(dict_subj_prop_obj): """The subjects must be sorted by their title.""" tuples_subjects_list = [] for a_subj in dict_subj_prop_obj: subj_str = str(a_subj) subj_title, entity_graphic_class, entity_id = lib_naming.ParseEntityUri( subj_str) if subj_title: # The intention is to detect a specific test case with accented characters. if subj_title[0] == 'Y' and subj_title.find("Boulogne"): logging.debug("_display_class_objects_no_jinja subj_str=%s" % subj_str) logging.debug("_display_class_objects_no_jinja subj_title=%s" % subj_title) continue else: logging.debug("NO TITLE FOR %s" % subj_str) tuples_subjects_list.append( (a_subj, subj_str, subj_title, entity_graphic_class, entity_id)) # Sorted by the title of the subject, which is the third value of the tuple. lib_util.natural_sort_list(tuples_subjects_list, key=lambda tup: tup[2]) # Apparently, a problem is that "%" gets transformed into an hexadecimal number, preventing decoding. def _custom_decode_hex(the_str): the_str = lib_util.survol_unescape(the_str) return the_str.replace("%25", "%").replace("%2F", "/").replace( "%5C", "\\").replace("%3A", ":") # Now it iterates on the sorted list. # This reuses all the intermediate values. for a_subj, subj_str, subj_title, entity_graphic_class, entity_id in tuples_subjects_list: # FIXME: This is a specific test to catch a specific condition... if a_subj.find("Boulogne") >= 0 or subj_str.find( "Boulogne") >= 0 or subj_title.find("Boulogne") >= 0: logging.debug("a_subj=%s" % a_subj) logging.debug("subj_str=%s" % subj_str) logging.debug("subj_title=%s" % subj_title) continue dict_pred = dict_subj_prop_obj[a_subj] # Total number of lines. cnt_preds = 0 for a_pred in dict_pred: lst_objs = dict_pred[a_pred] cnt_preds += len(lst_objs) must_write_col_one_subj = True subj_str_with_mode = _url_in_html_mode(subj_str) # The predicates, i.e. the properties associated a subject with an object, # must be alphabetically sorted. for a_pred in lib_util.natural_sorted(dict_pred): lst_objs = dict_pred[a_pred] pred_str = lib_exports.AntiPredicateUri(str(a_pred)) cnt_objs = len(lst_objs) must_write_col_one_pred = True # The objects must be sorted by title. lst_tuples_objs = [] for an_obj in lst_objs: obj_str = str(an_obj) obj_str = _custom_decode_hex(obj_str) obj_title = lib_naming.ParseEntityUri(obj_str)[0] lst_tuples_objs.append((an_obj, obj_str, obj_title)) # Sorted by the title of the object, which is the third value of the tuple. lib_util.natural_sort_list(lst_tuples_objs, key=lambda tup: tup[2]) for an_obj, obj_str, obj_title in lst_tuples_objs: # FIXME: This is a specific test to catch a specific condition... if an_obj.find("Boulogne") >= 0 or obj_str.find( "Boulogne") >= 0 or obj_title.find("Boulogne") >= 0: logging.debug("an_obj=%s" % an_obj) logging.debug("obj_str=%s" % obj_str) logging.debug("obj_title=%s" % obj_title) continue # Nasty encoding errors. Probably the string should have been encoded before. if lib_util.isPlatformWindows and not lib_util.is_py3: try: dummy_str = obj_title.decode('ascii') except UnicodeDecodeError as exc: logging.debug("obj_title=%s" % obj_title) obj_title = obj_title.decode('cp1252') yield '<tr>' if must_write_col_one_subj: yield ( '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>' % (str(cnt_preds), subj_str_with_mode, subj_title)) must_write_col_one_subj = False if must_write_col_one_pred: if a_pred not in _list_props_td_double_col_span: yield '<td valign="top" rowspan="%s">%s</td>' % ( str(cnt_objs), pred_str) must_write_col_one_pred = False if a_pred in _list_props_td_double_col_span: col_span = 2 else: col_span = 1 disp_mime_urls = True yield '<td colspan="%d">' % col_span if disp_mime_urls: if lib_kbase.IsLink(an_obj): obj_str_clean = lib_util.UrlNoAmp(obj_str) mimeType = lib_mime.get_mime_type_from_url( obj_str_clean) if mimeType: if mimeType.startswith("image/"): yield ( """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>""" % (obj_str, obj_str, obj_title)) else: yield """<a href="%s">%s</a>""" % (obj_str, obj_title) else: url_with_mode = lib_util.url_mode_replace( obj_str, "html") yield """<a href="%s">%s</a>""" % (url_with_mode, obj_title) else: yield '%s' % obj_str else: if lib_kbase.IsLink(an_obj): url_with_mode = _url_in_html_mode(obj_str) yield '<a href="%s">%s</a>' % (url_with_mode, obj_title) else: yield '%s' % obj_str yield "</td>" yield "</tr>"
def DispClassObjects(dictSubjPropObj): listPropsTdDoubleColSpan = [ pc.property_information, pc.property_rdf_data_nolist2, pc.property_rdf_data_nolist1 ] WrtAsUtf('<table class="class_objects">') # The subjects must be sorted by their title. lstTuplesSubjects = [] for aSubj in dictSubjPropObj: subj_str = str(aSubj) (subj_title, entity_graphic_class, entity_id) = lib_naming.ParseEntityUri(subj_str) lstTuplesSubjects.append( (aSubj, subj_str, subj_title, entity_graphic_class, entity_id)) # Sorted by the title of the subject, which is the third value of the tuple. lib_util.natural_sort_list(lstTuplesSubjects, key=lambda tup: tup[2]) # Now it iterates on the sorted list. # This reuses all the intermediate values. for aSubj, subj_str, subj_title, entity_graphic_class, entity_id in lstTuplesSubjects: dictPred = dictSubjPropObj[aSubj] arrayGraphParams = lib_patterns.TypeToGraphParams(entity_graphic_class) # "Graphic_shape","Graphic_colorfill","Graphic_colorbg","Graphic_border","Graphic_is_rounded" colorClass = arrayGraphParams[1] # Total number of lines. cntPreds = 0 for aPred in dictPred: lstObjs = dictPred[aPred] cntPreds += len(lstObjs) mustWriteColOneSubj = True subj_str_with_mode = UrlInHtmlMode(subj_str) # The predicates, i.e. the properties associated a subject with an object, # must be alphabetically sorted. for aPred in lib_util.natural_sorted(dictPred): lstObjs = dictPred[aPred] predStr = lib_exports.AntiPredicateUri(str(aPred)) cntObjs = len(lstObjs) mustWriteColOnePred = True # The objects must be sorted by title. lstTuplesObjs = [] for anObj in lstObjs: obj_str = str(anObj) obj_str = DesHex(obj_str) obj_title = lib_naming.ParseEntityUri(obj_str)[0] lstTuplesObjs.append((anObj, obj_str, obj_title)) # Sorted by the title of the object, which is the third value of the tuple. lib_util.natural_sort_list(lstTuplesObjs, key=lambda tup: tup[2]) for anObj, obj_str, obj_title in lstTuplesObjs: WrtAsUtf('<tr bgcolor="%s">' % colorClass) if mustWriteColOneSubj: WrtAsUtf( '<td valign="top" rowspan="%s"><a href="%s">%s</a></td>' % (str(cntPreds), subj_str_with_mode, subj_title)) mustWriteColOneSubj = False if mustWriteColOnePred: if aPred not in listPropsTdDoubleColSpan: WrtAsUtf('<td valign="top" rowspan="%s">%s</td>' % (str(cntObjs), predStr)) mustWriteColOnePred = False if aPred in listPropsTdDoubleColSpan: colSpan = 2 else: colSpan = 1 dispMimeUrls = True WrtAsUtf('<td colspan="%d">' % (colSpan)) if dispMimeUrls: if lib_kbase.IsLink(anObj): objStrClean = lib_util.UrlNoAmp(obj_str) mimeType = lib_mime.GetMimeTypeFromUrl(objStrClean) if mimeType: if mimeType.startswith("image/"): WrtAsUtf( """<a href="%s"><img src="%s" alt="%s" height="42" width="42"></a>""" % (obj_str, obj_str, obj_title)) else: WrtAsUtf("""<a href="%s">%s</a>""" % (obj_str, obj_title)) else: url_with_mode = lib_util.AnyUriModed( obj_str, "html") WrtAsUtf("""<a href="%s">%s</a>""" % (url_with_mode, obj_title)) else: WrtAsUtf('%s' % (obj_str)) else: if lib_kbase.IsLink(anObj): url_with_mode = UrlInHtmlMode(obj_str) WrtAsUtf('<a href="%s">%s</a>' % (url_with_mode, obj_title)) else: WrtAsUtf('%s' % (obj_str)) WrtAsUtf("</td>") WrtAsUtf("</tr>") WrtAsUtf(" </table>")
def parse_entity_uri_with_host(uri_with_mode, long_display=True, force_entity_ip_addr=None): """ This does two different things from an instance URI: - Split an URL into its class, key-value params and host. - And calculate a displayable string for this instance. This is used notably when transforming RDF into dot documents. The returned entity type is used for example choosing graphic attributes and gives more information than the simple entity type. Example: (labText, entity_graphic_class, entity_id) = lib_naming.ParseEntityUri(the_url) TODO: It could be split into two different functions. """ # Maybe there is a host name before the entity type. It can contain letters, numbers, # hyphens, dots etc... but no ":" or "@". # THIS CANNOT WORK WITH IPV6 ADDRESSES... # WE MAY USE SCP SYNTAX: scp -6 osis@\[2001:db8:0:1\]:/home/osis/test.file ./test.file # This conversion because it might be called with rdflib.term.URIRef or rdflib.term.Literal # without explicit conversion, or unicode if py2. # This is not really in issue since this conversion is straightforward. non_str_type = six.binary_type if lib_util.is_py3 else six.text_type if isinstance(uri_with_mode, (rdflib.term.Literal, rdflib.term.URIRef, non_str_type)): uri_with_mode = str(uri_with_mode) assert isinstance(uri_with_mode, str) # This replaces "&" by "&" up to two times if needed. uri_with_mode_clean = lib_util.UrlNoAmp(uri_with_mode) uprs = lib_util.survol_urlparse(uri_with_mode_clean) uprs_query = uprs.query uprs_query_split_cgi = uprs_query.split("&") uprs_query_dict = {k: v for k, _, v in (l.partition("=") for l in uprs_query_split_cgi)} cgi_arg_xid = uprs_query_dict.get("xid", None) uri_mode = uprs_query_dict.get("mode", "") associator_attribute = uprs_query_dict.get("__associator_attribute__", None) # Default value. entity_host = "" fil_script = os.path.basename(uprs.path) # This works for the scripts: # entity.py xid=namespace/type:idGetNamespaceType # objtypes_wbem.py Just extracts the namespace, as it prefixes the type: xid=namespace/type:id # See variable lib_util.xidCgiDelimiter="?xid=" # Possibly, the "xid" parameter does not come at the beginning. # Only the first "=" delimiter counts for the CGI variable. # if uprs.query.startswith("xid="): if cgi_arg_xid is not None: # TODO: Maybe the chain contains HTML codes and therefore cannot be parsed. # Ex: "xid=%40%2F%3Aoracle_package." == "xid=@/:oracle_package." # entity_type, entity_id, entity_host = lib_util.ParseXid(uprs.query[4:]) entity_type, entity_id, entity_host = lib_util.ParseXid(cgi_arg_xid) entity_graphic_class = entity_type entity_label = _calc_label(entity_host, entity_type, entity_id, force_entity_ip_addr, fil_script) # TODO: Consider external_url_to_title, similar logic with different results. if long_display: entity_label = _known_script_to_title(fil_script, uri_mode, entity_host, entity_label) # Maybe an internal script, but not entity.py # It has a special entity type as a display parameter elif uri_with_mode_clean.startswith(lib_util.uriRoot): # This is a bit of a special case which allows to display something if we know only # the type of the entity but its id is undefined. Instead of displaying nothing, # this attempts to display all available entities of this given type. # source_top/enumerate_process.py etc... Not "." because this has a special role in Python. mtch_enumerate = re.match(r"^.*/enumerate_([a-z0-9A-Z_]*)\.py$", uri_with_mode_clean) if mtch_enumerate: entity_graphic_class = mtch_enumerate.group(1) entity_id = "" # TODO: Change this label, not very nice. # This indicates that a specific script can list all objects of a given entity type. entity_label = entity_graphic_class + " enumeration" else: entity_graphic_class = "provider_script" entity_id = "" entity_label = _known_script_to_title(fil_script, uri_mode) elif uri_with_mode_clean.split(':')[0] in ["ftp", "http", "https", "urn", "mail"]: # Standard URLs. Example: lib_common.NodeUrl( "http://www.google.com" ) entity_graphic_class = "" entity_id = "" # Display the complete URL, otherwise it is not clickable. entity_label = uri_with_mode else: entity_graphic_class = "" # This specific keyword used when no class is specified and there is no object. It is easy to spot. # It happens for example for blank nodes, BNode, used to created literal values with a key: # Arguments of a function, successive values with a time-stamp. entity_id = "PLAINTEXTONLY" # Maybe an external URI sending data in RDF, HTML etc... # We could also load the URL and gets its title if it is in HTML. basna = lib_util.EncodeUri(fil_script) if uprs.netloc != "": entity_label = uprs.netloc + "/" + basna else: entity_label = basna # TODO: " " are replaced by "%20". Why ? So change back. entity_label = entity_label.replace("%20", " ") assert isinstance(entity_graphic_class, str) if associator_attribute: entity_label = associator_attribute.replace(".", " ") + " of " + entity_label return entity_label, entity_graphic_class, entity_id, entity_host
def ParseEntityUri(uriWithMode, longDisplay=True, force_entity_ip_addr=None): #sys.stderr.write("ParseEntityUri uriWithMode=%s\n"%uriWithMode) # Maybe there is a host name before the entity type. It can contain letters, numbers, # hyphens, dots etc... but no ":" or "@". # THIS CANNOT WORK WITH IPV6 ADDRESSES... # WE MAY USE SCP SYNTAX: scp -6 osis@\[2001:db8:0:1\]:/home/osis/test.file ./test.file # In the URI, we might have the CGI parameter "&mode=json". It must be removed otherwise # it could be taken in entity_id, and the result of EntityToLabel() would be wrong. uriWithModeClean = lib_util.UrlNoAmp(uriWithMode) uri = lib_util.AnyUriModed(uriWithModeClean, "") uriMode = lib_util.GetModeFromUrl(uriWithModeClean) uprs = lib_util.survol_urlparse(uri) filScript = os.path.basename(uprs.path) # sys.stderr.write("ParseEntityUri filScript=%s\n"%filScript) # This works for the scripts: # entity.py xid=namespace/type:idGetNamespaceType # objtypes_wbem.py Just extracts the namespace, as it prefixes the type: xid=namespace/type:id # See variable lib_util.xidCgiDelimiter="?xid=" if uprs.query.startswith("xid="): # TODO: Maybe the chain contains HTML codes and therefore cannot be parsed. # Ex: "xid=%40%2F%3Aoracle_package." == "xid=@/:oracle_package." (entity_type, entity_id, entity_host) = lib_util.ParseXid(uprs.query[4:]) entity_graphic_class = entity_type entity_label = CalcLabel(entity_host, entity_type, entity_id, force_entity_ip_addr, filScript) # TODO: Consider external_url_to_title, similar logic with different results. if longDisplay: entity_label = KnownScriptToTitle(filScript, uriMode, entity_host, entity_label) # Maybe an internal script, but not entity.py # It has a special entity type as a display parameter elif uri.startswith(lib_util.uriRoot): # This is a bit of a special case which allows to display something if we know only # the type of the entity but its id is undefined. Instead of displaying nothing, # this attempts to display all available entities of this given type. # source_top/enumerate_process.py etc... Not "." because this has a special role in Python. mtch_enumerate = re.match(r"^.*/enumerate_([a-z0-9A-Z_]*)\.py$", uri) if mtch_enumerate: entity_graphic_class = mtch_enumerate.group(1) entity_id = "" # TODO: Change this label, not very nice. # This indicates that a specific script can list all objects of a given entity type. entity_label = entity_graphic_class + " enumeration" else: entity_graphic_class = "provider_script" entity_id = "" entity_label = KnownScriptToTitle(filScript, uriMode) elif uri.split(':')[0] in ["ftp", "http", "https", "urn", "mail"]: # Standard URLs. Example: lib_common.NodeUrl( "http://www.google.com" ) entity_graphic_class = "" entity_id = "" # Display the complete URL, otherwise it is not clickable. entity_label = uriWithMode # uri # uri.split('/')[2] else: entity_graphic_class = "" entity_id = "PLAINTEXTONLY" entity_label = UriToTitle(uprs) # TODO: " " are replaced by "%20". Why ? So change back. entity_label = entity_label.replace("%20", " ") return (entity_label, entity_graphic_class, entity_id)