def Main(): cgiEnv = lib_common.ScriptEnvironment() pidint = int(cgiEnv.GetId()) grph = cgiEnv.GetGraph() node_process = lib_uris.gUriGen.PidUri(pidint) try: rgx_http = r"\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\}" resu_com_classes = memory_regex_search.GetRegexMatches( pidint, rgx_http) prop_com_class = lib_common.MakeProp("COM class") for class_idx in resu_com_classes: com_class_id = resu_com_classes[class_idx] # On Python3, this is a bytes which must be converted to str. com_class_id = str(com_class_id) # comClass = "{DB7A405D-208F-4E88-BA0A-132ACFA0B5B6}" for example. typelib_node = lib_uris.gUriGen.ComRegisteredTypeLibUri( com_class_id) grph.add((node_process, prop_com_class, typelib_node)) except Exception as exc: lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) cgiEnv.OutCgiRdf()
def Main(): cgiEnv = lib_common.CgiEnv() pid_as_integer = int(cgiEnv.GetId()) grph = cgiEnv.GetGraph() node_process = lib_common.gUriGen.PidUri(pid_as_integer) dict_regex_sql = lib_sql.SqlRegularExpressions() arr_props = [] # TODO: Unfortunately it scans several times the memory process. for rgx_key in dict_regex_sql: rgx_sql = dict_regex_sql[rgx_key] regex_predicate = lib_common.MakeProp(rgx_key) arr_props.append(regex_predicate) try: # https://docs.python.org/3/library/re.html # re.MULTILINE | re.ASCII | re.IGNORECASE matched_sqls = memory_regex_search.GetRegexMatches(pid_as_integer, rgx_sql, re.IGNORECASE) except Exception as exc: lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) all_queries_set = set() for sql_idx in matched_sqls: sql_qry = matched_sqls[sql_idx] _process_scanned_sql_query(sql_qry, all_queries_set) _generate_from_sql_queries(grph, node_process, regex_predicate, all_queries_set, pid_as_integer) cgiEnv.OutCgiRdf("LAYOUT_RECT",arr_props)
def _get_aggreg_dsns(pidint, rgx_dsn): r""" The input is a process id and a regular expression for finding ODBC connection strings. Example of ODBC connection string: "Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes;" Example of offset=>match dict: 34515015: "Driver={SQL Server}" 34515035: "Server=.\SQLEXPRESS" 34515055: "Database=ExpressDB" 34515074: "Trusted_Connection=yes" 35634903: "Driver={SQL Server}" 35634923: "Server=.\SQLEXPRESS" 35634943: "Database=ExpressDB" 35634962: "Trusted_Connection=yes" """ try: # Not letter, then the keyword, then "=", then the value regex, then possibly the delimiter. #rgx_dsn = "|".join(["[; ]*" + key + " *= *" + map_rgx[key] + " *" for key in map_rgx]) # This works also. Both are very slow. # rgx_dsn = "|".join([ ";? *" + key + " *= *" + survol_odbc.mapRgxODBC[key] + " *" for key in survol_odbc.mapRgxODBC ]) logging.debug("rgx_dsn=%s", rgx_dsn) # TODO: OPTIONALLY ADD NON-ASCII CHAR AT THE VERY BEGINNING. SLIGHTLY SAFER AND FASTER. # rgx_dsn = "[^a-zA-Z]" + regDSN # Here we receive the matched keywords and their offset in memory. # We try to aggregate them if they are contiguous. # This will work less if we used a smaller set of DSN connecton strings keywords. # This could be fixed with theese remarks: # (1) If the difference of offsets is small. # (2) Try to extensively scan the memory (All DSN keywords) in the interval of detected common keywords. resu_matches = memory_regex_search.GetRegexMatches( pidint, rgx_dsn, re.IGNORECASE) aggreg_dsns = survol_odbc.aggregate_dsn_pieces(resu_matches) # Last pass after aggregation: # If several tokens were aggregated and are still separated by a few chars (20, 30 etc...), # we can assume that they are part of the same connection string, # especially they contain complementary keywords (UID them PWD etc...) # So, it does not really matter if some rare keywords are not known. # We could have a last pass to extract these keywords: Although we are by definition unable # able to use their content explicitely, a complete connection string can still be used # to connect to ODBC. # http://www.dofactory.com/reference/connection-strings # TODO: Instead of just displaying the DSN, connect to it, list tables etc... return aggreg_dsns except Exception as exc: lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))
def Main(): cgiEnv = lib_common.ScriptEnvironment() pidint = int(cgiEnv.GetId()) grph = cgiEnv.GetGraph() node_process = lib_uris.gUriGen.PidUri(pidint) try: # http://daringfireball.net/2010/07/improved_regex_for_matching_urls rgx_http = r"https?://[a-z_0-9\.]+" resu_urls = memory_regex_search.GetRegexMatches(pidint, rgx_http) resu_clean = set() # The URLs which are detected in the process memory might be broken, invalid etc... # Only some of them are in valid strings. The other may come from deallocated memory etc... for url_idx in resu_urls: url_http = resu_urls[url_idx] # In memory, we find strings such as "http://adblockplus.orgzzzzzzzzzzzz" # or "http://adblockplus.orgzzzzzzzzzzzz" # "activistpost.netzx" url_http = url_http.decode() split_dots = url_http.split(".") top_level = split_dots[-1] # Primitive way to remove apparently broken URLs. if len(top_level) > 4: continue resu_clean.add(url_http) for url_http in resu_clean: node_portal_web = lib_common.NodeUrl(url_http) grph.add( (node_process, pc.property_rdf_data_nolist1, node_portal_web)) logging.debug("Added %d nodes, len_graph=%d", len(resu_clean), len(grph)) except Exception as exc: lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) cgiEnv.OutCgiRdf()
def Main(): cgiEnv = lib_common.CgiEnv() pidint = int(cgiEnv.GetId()) grph = cgiEnv.GetGraph() node_process = lib_common.gUriGen.PidUri(pidint) try: # http://daringfireball.net/2010/07/improved_regex_for_matching_urls # rgxHttp = "http://[a-zA-Z_0-9\.]*" rgxHttp = "http://[a-z_0-9\.]*" resuUrls = memory_regex_search.GetRegexMatches(pidint, rgxHttp) resuClean = set() # The URLs which are detected in the process memory might be broken, invalid etc... # Only some of them are in valid strings. The other may come from deallocated memory etc... for urlIdx in resuUrls: urlHttp = resuUrls[urlIdx] # In memory, we find strings such as "http://adblockplus.orgzzzzzzzzzzzz" # or "http://adblockplus.orgzzzzzzzzzzzz" # "activistpost.netzx" urlHttp = str(urlHttp) # On Python3, this is a bytes array. splitDots = urlHttp.split(".") topLevel = splitDots[-1] # Primitive way to remove apparently broken URLs. if (len(topLevel) > 4): continue resuClean.add(urlHttp) for urlHttp in resuClean: # sys.stderr.write("urlHttp=%s\n"%urlHttp) nodePortalWbem = lib_common.NodeUrl(urlHttp) grph.add( (node_process, pc.property_rdf_data_nolist1, nodePortalWbem)) except Exception: exc = sys.exc_info()[1] lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) cgiEnv.OutCgiRdf()
def Main(): cgiEnv = lib_common.CgiEnv() pidint = int(cgiEnv.GetId()) grph = cgiEnv.GetGraph() node_process = lib_common.gUriGen.PidUri(pidint) dictRegexSQL = lib_sql.SqlRegularExpressions() arrProps = [] # TODO: Unfortunately it scans several times the memory process. for rgxKey in dictRegexSQL: rgxSQL = dictRegexSQL[rgxKey] rgxProp = lib_common.MakeProp(rgxKey) arrProps.append(rgxProp) try: # https://docs.python.org/3/library/re.html # re.MULTILINE | re.ASCII | re.IGNORECASE matchedSqls = memory_regex_search.GetRegexMatches( pidint, rgxSQL, re.IGNORECASE) except Exception: exc = sys.exc_info()[1] lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) setQrys = set() for sqlIdx in matchedSqls: sqlQry = matchedSqls[sqlIdx] ProcessScannedSqlQuery(sqlQry, setQrys) GenerateFromSqlQrys(grph, node_process, rgxProp, setQrys, pidint) cgiEnv.OutCgiRdf("LAYOUT_RECT", arrProps)
def Main(): # Parameter for the minimal depth of the regular expression. # min=3, otherwise any string with a "/" will match. keyMiniDepth = "Minimum filename depth" # Otherwise, only look for absolute filenames. keyWithRelative = "Search relative filenames" keyCheckExistence = "Check file existence" cgiEnv = lib_common.CgiEnv(parameters={ keyMiniDepth: 3, keyWithRelative: False, keyCheckExistence: True }) pid_as_integer = int(cgiEnv.GetId()) paramMiniDepth = int(cgiEnv.get_parameters(keyMiniDepth)) paramWithRelative = bool(cgiEnv.get_parameters(keyWithRelative)) paramCheckExistence = bool(cgiEnv.get_parameters(keyCheckExistence)) grph = cgiEnv.GetGraph() node_process = lib_common.gUriGen.PidUri(pid_as_integer) try: obj_parser = _filename_parser_generator() rgx_fil_nam = obj_parser.create_regex(paramMiniDepth, paramWithRelative) WARNING("rgx_fil_nam=%s", rgx_fil_nam) resu_fil_nams = memory_regex_search.GetRegexMatches( pid_as_integer, rgx_fil_nam) # This avoids duplicates. unique_filenames = set() # The file names which are detected in the process memory might be broken, invalid etc... # Only some of them are in valid strings. The other may come from deallocated memory etc... for idx_fil_nam in resu_fil_nams: a_fil_nam_buffer = resu_fil_nams[idx_fil_nam] if lib_util.is_py3: assert isinstance(a_fil_nam_buffer, bytes) else: assert isinstance(a_fil_nam_buffer, str) is_path, a_fil_nam_list = obj_parser.cleanup_filename( a_fil_nam_buffer) # print("a_fil_nam_list %s", str(a_fil_nam_list)) if is_path: # This is just a list of directories. This could be an interesting information, # but it does not imply the creation or access of actual files and cirectories. #WARNING("THIS IS JUST A PATH:%s", str(a_fil_nam_list)) pass else: # These files might actuqally be used. for one_filename in a_fil_nam_list: #ERROR("ADDING %s", one_filename) if lib_util.is_py3: assert isinstance(one_filename, str) else: assert isinstance(one_filename, unicode) _check_unique_filenames(one_filename, unique_filenames, paramCheckExistence) for a_fil_nam in unique_filenames: #DEBUG("a_fil_nam=%s",a_fil_nam) node_filnam = lib_common.gUriGen.FileUri(a_fil_nam) grph.add((node_process, pc.property_rdf_data_nolist1, node_filnam)) WARNING("unique file numbers=%d", len(unique_filenames)) except Exception as exc: lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc)) cgiEnv.OutCgiRdf()
def Main(): # Parameter for the minimal depth of the regular expression. # min=3, otherwise any string with a "/" will match. keyMiniDepth = "Minimum filename depth" # Otherwise, only look for absolute filenames. keyWithRelative = "Search relative filenames" keyCheckExistence = "Check file existence" cgiEnv = lib_common.CgiEnv( parameters = { keyMiniDepth : 3, keyWithRelative : False, keyCheckExistence : True }) pidint = int( cgiEnv.GetId() ) paramMiniDepth = int(cgiEnv.GetParameters( keyMiniDepth )) paramWithRelative = bool(cgiEnv.GetParameters( keyWithRelative )) paramCheckExistence = bool(cgiEnv.GetParameters( keyCheckExistence )) grph = cgiEnv.GetGraph() node_process = lib_common.gUriGen.PidUri(pidint) try: objParser = FilenameParserFunc() rgxFilNam = objParser.Regex(paramMiniDepth,paramWithRelative) sys.stderr.write("rgxFilNam=%s\n"%rgxFilNam) resuFilNams = memory_regex_search.GetRegexMatches(pidint,rgxFilNam) # This avoids duplicates. resuClean = set() # The file names which are detected in the process memory might be broken, invalid etc... # Only some of them are in valid strings. The other may come from deallocated memory etc... for idxFilNam in resuFilNams: aFilNam = resuFilNams[idxFilNam] # Depending opn the regular expression, the result must be adapted. aFilNam = objParser.Cleanup(aFilNam) if aFilNam in resuClean: continue # The file must exist. If we cannot access it does not matter. # TODO: Must accept if we can access it or not. if paramCheckExistence: # TODO: Test existence of relative files by prefixing with current directory. try: oFil = open(aFilNam,"r") except: exc = sys.exc_info()[1] sys.stderr.write("open:%s throw:%s\n"%(aFilNam,str(exc))) continue if not oFil: continue oFil.close() resuClean.add( aFilNam ) for aFilNam in resuClean: sys.stderr.write("aFilNam=%s\n"%aFilNam) nodeFilnam = lib_common.gUriGen.FileUri( aFilNam ) grph.add( ( node_process, pc.property_rdf_data_nolist1, nodeFilnam ) ) except Exception: exc = sys.exc_info()[1] lib_common.ErrorMessageHtml("Error:%s. Protection ?"%str(exc)) cgiEnv.OutCgiRdf()
def GetAggregDsns(pidint, mapRgx): # "Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes;" # 34515015 = "Driver={SQL Server}" # 34515035 = "Server=.\SQLEXPRESS" # 34515055 = "Database=ExpressDB" # 34515074 = "Trusted_Connection=yes" # 35634903 = "Driver={SQL Server}" # 35634923 = "Server=.\SQLEXPRESS" # 35634943 = "Database=ExpressDB" # 35634962 = "Trusted_Connection=yes" try: # Not letter, then the keyword, then "=", then the value regex, then possibly the delimiter. rgxDSN = "|".join( ["[; ]*" + key + " *= *" + mapRgx[key] + " *" for key in mapRgx]) # This works also. Both are very slow. # rgxDSN = "|".join([ ";? *" + key + " *= *" + survol_odbc.mapRgxODBC[key] + " *" for key in survol_odbc.mapRgxODBC ]) DEBUG("rgxDSN=%s", rgxDSN) # TODO: OPTIONALLY ADD NON-ASCII CHAR AT THE VERY BEGINNING. SLIGHTLY SAFER AND FASTER. # rgxDSN = "[^a-zA-Z]" + regDSN # Here we receive the matched keywords and their offset in memory. # We try to aggregate them if they are contiguous. # This will work less if we used a smaller set of DSN connecton strings keywords. # This could be fixed with theese remarks: # (1) If the difference of offsets is small. # (2) Try to extensively scan the memory (All DSN keywords) in the interval of detected common keywords. resuMatches = memory_regex_search.GetRegexMatches( pidint, rgxDSN, re.IGNORECASE) for matchedOffset in resuMatches: matchedStr = resuMatches[matchedOffset] dsnToken = str(matchedOffset) + " = " + matchedStr + " = " + str( matchedOffset + len(matchedStr)) DEBUG("dsnODBC=%s", dsnToken) sortedKeys = sorted(resuMatches.keys()) aggregDsns = dict() lastOffset = 0 currOffset = 0 for theOff in sortedKeys: currMtch = resuMatches[theOff] nextOffset = theOff + len(currMtch) DEBUG("lastOffset=%d nextOffset=%d currMtch=%s", lastOffset, nextOffset, currMtch) #if lastOffset == 0: # lastOffset = nextOffset # aggregDsns[lastOffset] = currMtch # continue if lastOffset == theOff: aggregDsns[currOffset] += currMtch else: # This starts a new DSN string. currOffset = theOff aggregDsns[currOffset] = currMtch lastOffset = nextOffset # TODO: Eliminate aggrehated strings containing one or two tokens, # because they cannot be genuine DSNs. # 29812569: SERVER=\RCHATEAU-HP # 34515016: Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes # 34801013: SERVER=\RCHATEAU-HP # 35634904: Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes return aggregDsns # Last pass after aggregation: # If several tokens were aggregated and are still separated by a few chars (20, 30 etc...), # we can assume that they are part of the same connection string, # especially they contain complementary keywords (UID them PWD etc...) # So, it does not really matter if some rare keywords are not known. # We could have a last pass to extract these keywords: Although we are by definition unable # able to use their content explicitely, a complete connection string can still be used # to connect to ODBC. # http://www.dofactory.com/reference/connection-strings # TODO: Instead of just displaying the DSN, connect to it, list tables etc... except Exception: exc = sys.exc_info()[1] lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))