Python GetRegexMatches Beispiele, sources_types.CIM_Process.memory_regex_search.GetRegexMatches Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: search_com_classes.py Projekt: rchateauneu/survol

def Main():
    cgiEnv = lib_common.ScriptEnvironment()
    pidint = int(cgiEnv.GetId())

    grph = cgiEnv.GetGraph()

    node_process = lib_uris.gUriGen.PidUri(pidint)

    try:
        rgx_http = r"\{[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\}"

        resu_com_classes = memory_regex_search.GetRegexMatches(
            pidint, rgx_http)

        prop_com_class = lib_common.MakeProp("COM class")
        for class_idx in resu_com_classes:
            com_class_id = resu_com_classes[class_idx]
            # On Python3, this is a bytes which must be converted to str.
            com_class_id = str(com_class_id)

            # comClass = "{DB7A405D-208F-4E88-BA0A-132ACFA0B5B6}" for example.
            typelib_node = lib_uris.gUriGen.ComRegisteredTypeLibUri(
                com_class_id)
            grph.add((node_process, prop_com_class, typelib_node))

    except Exception as exc:
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

    cgiEnv.OutCgiRdf()

Beispiel #2

0

Datei anzeigen

def Main():
	cgiEnv = lib_common.CgiEnv()
	pid_as_integer = int(cgiEnv.GetId())

	grph = cgiEnv.GetGraph()

	node_process = lib_common.gUriGen.PidUri(pid_as_integer)

	dict_regex_sql = lib_sql.SqlRegularExpressions()

	arr_props = []

	# TODO: Unfortunately it scans several times the memory process.
	for rgx_key in dict_regex_sql:
		rgx_sql = dict_regex_sql[rgx_key]
		regex_predicate = lib_common.MakeProp(rgx_key)
		arr_props.append(regex_predicate)

		try:
			# https://docs.python.org/3/library/re.html
			# re.MULTILINE | re.ASCII | re.IGNORECASE
			matched_sqls = memory_regex_search.GetRegexMatches(pid_as_integer, rgx_sql, re.IGNORECASE)
		except Exception as exc:
			lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

		all_queries_set = set()

		for sql_idx in matched_sqls:
			sql_qry = matched_sqls[sql_idx]
			_process_scanned_sql_query(sql_qry, all_queries_set)

		_generate_from_sql_queries(grph, node_process, regex_predicate, all_queries_set, pid_as_integer)

	cgiEnv.OutCgiRdf("LAYOUT_RECT",arr_props)

Beispiel #3

0

Datei anzeigen

Datei: search_connection_strings.py Projekt: rchateauneu/survol

def _get_aggreg_dsns(pidint, rgx_dsn):
    r"""
    The input is a process id and a regular expression for finding ODBC connection strings.

    Example of ODBC connection string:
        "Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes;"

    Example of offset=>match dict:
        34515015: "Driver={SQL Server}"
        34515035: "Server=.\SQLEXPRESS"
        34515055: "Database=ExpressDB"
        34515074: "Trusted_Connection=yes"
        35634903: "Driver={SQL Server}"
        35634923: "Server=.\SQLEXPRESS"
        35634943: "Database=ExpressDB"
        35634962: "Trusted_Connection=yes"
    """

    try:
        # Not letter, then the keyword, then "=", then the value regex, then possibly the delimiter.
        #rgx_dsn = "|".join(["[; ]*" + key + " *= *" + map_rgx[key] + " *" for key in map_rgx])
        # This works also. Both are very slow.
        # rgx_dsn = "|".join([ ";? *" + key + " *= *" + survol_odbc.mapRgxODBC[key] + " *" for key in survol_odbc.mapRgxODBC ])
        logging.debug("rgx_dsn=%s", rgx_dsn)

        # TODO: OPTIONALLY ADD NON-ASCII CHAR AT THE VERY BEGINNING. SLIGHTLY SAFER AND FASTER.
        # rgx_dsn = "[^a-zA-Z]" + regDSN

        # Here we receive the matched keywords and their offset in memory.
        # We try to aggregate them if they are contiguous.
        # This will work less if we used a smaller set of DSN connecton strings keywords.
        # This could be fixed with theese remarks:
        # (1) If the difference of offsets is small.
        # (2) Try to extensively scan the memory (All DSN keywords) in the interval of detected common keywords.
        resu_matches = memory_regex_search.GetRegexMatches(
            pidint, rgx_dsn, re.IGNORECASE)

        aggreg_dsns = survol_odbc.aggregate_dsn_pieces(resu_matches)

        # Last pass after aggregation:
        # If several tokens were aggregated and are still separated by a few chars (20, 30 etc...),
        # we can assume that they are part of the same connection string,
        # especially they contain complementary keywords (UID them PWD etc...)
        # So, it does not really matter if some rare keywords are not known.
        # We could have a last pass to extract these keywords: Although we are by definition unable
        # able to use their content explicitely, a complete connection string can still be used
        # to connect to ODBC.

        # http://www.dofactory.com/reference/connection-strings

        # TODO: Instead of just displaying the DSN, connect to it, list tables etc...
        return aggreg_dsns
    except Exception as exc:
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

Beispiel #4

0

Datei anzeigen

def Main():
    cgiEnv = lib_common.ScriptEnvironment()
    pidint = int(cgiEnv.GetId())

    grph = cgiEnv.GetGraph()

    node_process = lib_uris.gUriGen.PidUri(pidint)

    try:
        # http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        rgx_http = r"https?://[a-z_0-9\.]+"

        resu_urls = memory_regex_search.GetRegexMatches(pidint, rgx_http)

        resu_clean = set()

        # The URLs which are detected in the process memory might be broken, invalid etc...
        # Only some of them are in valid strings. The other may come from deallocated memory etc...
        for url_idx in resu_urls:
            url_http = resu_urls[url_idx]
            # In memory, we find strings such as "http://adblockplus.orgzzzzzzzzzzzz"
            # or "http://adblockplus.orgzzzzzzzzzzzz"
            # "activistpost.netzx"

            url_http = url_http.decode()
            split_dots = url_http.split(".")
            top_level = split_dots[-1]
            # Primitive way to remove apparently broken URLs.
            if len(top_level) > 4:
                continue
            resu_clean.add(url_http)

        for url_http in resu_clean:
            node_portal_web = lib_common.NodeUrl(url_http)
            grph.add(
                (node_process, pc.property_rdf_data_nolist1, node_portal_web))
        logging.debug("Added %d nodes, len_graph=%d", len(resu_clean),
                      len(grph))

    except Exception as exc:
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

    cgiEnv.OutCgiRdf()

Beispiel #5

0

Datei anzeigen

Datei: search_urls.py Projekt: vchateauneu/survol

def Main():
    cgiEnv = lib_common.CgiEnv()
    pidint = int(cgiEnv.GetId())

    grph = cgiEnv.GetGraph()

    node_process = lib_common.gUriGen.PidUri(pidint)

    try:
        # http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        # rgxHttp = "http://[a-zA-Z_0-9\.]*"
        rgxHttp = "http://[a-z_0-9\.]*"

        resuUrls = memory_regex_search.GetRegexMatches(pidint, rgxHttp)

        resuClean = set()

        # The URLs which are detected in the process memory might be broken, invalid etc...
        # Only some of them are in valid strings. The other may come from deallocated memory etc...
        for urlIdx in resuUrls:
            urlHttp = resuUrls[urlIdx]
            # In memory, we find strings such as "http://adblockplus.orgzzzzzzzzzzzz"
            # or "http://adblockplus.orgzzzzzzzzzzzz"
            # "activistpost.netzx"
            urlHttp = str(urlHttp)  # On Python3, this is a bytes array.
            splitDots = urlHttp.split(".")
            topLevel = splitDots[-1]
            # Primitive way to remove apparently broken URLs.
            if (len(topLevel) > 4):
                continue
            resuClean.add(urlHttp)

        for urlHttp in resuClean:
            # sys.stderr.write("urlHttp=%s\n"%urlHttp)
            nodePortalWbem = lib_common.NodeUrl(urlHttp)
            grph.add(
                (node_process, pc.property_rdf_data_nolist1, nodePortalWbem))

    except Exception:
        exc = sys.exc_info()[1]
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

    cgiEnv.OutCgiRdf()

Beispiel #6

0

Datei anzeigen

Datei: scan_sql_queries.py Projekt: vchateauneu/survol

def Main():
    cgiEnv = lib_common.CgiEnv()
    pidint = int(cgiEnv.GetId())

    grph = cgiEnv.GetGraph()

    node_process = lib_common.gUriGen.PidUri(pidint)

    dictRegexSQL = lib_sql.SqlRegularExpressions()

    arrProps = []

    # TODO: Unfortunately it scans several times the memory process.
    for rgxKey in dictRegexSQL:
        rgxSQL = dictRegexSQL[rgxKey]
        rgxProp = lib_common.MakeProp(rgxKey)
        arrProps.append(rgxProp)

        try:
            # https://docs.python.org/3/library/re.html
            # re.MULTILINE | re.ASCII | re.IGNORECASE
            matchedSqls = memory_regex_search.GetRegexMatches(
                pidint, rgxSQL, re.IGNORECASE)
        except Exception:
            exc = sys.exc_info()[1]
            lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

        setQrys = set()

        for sqlIdx in matchedSqls:
            sqlQry = matchedSqls[sqlIdx]
            ProcessScannedSqlQuery(sqlQry, setQrys)

        GenerateFromSqlQrys(grph, node_process, rgxProp, setQrys, pidint)

    cgiEnv.OutCgiRdf("LAYOUT_RECT", arrProps)

Beispiel #7

0

Datei anzeigen

def Main():
    # Parameter for the minimal depth of the regular expression.
    # min=3, otherwise any string with a "/" will match.
    keyMiniDepth = "Minimum filename depth"

    # Otherwise, only look for absolute filenames.
    keyWithRelative = "Search relative filenames"

    keyCheckExistence = "Check file existence"

    cgiEnv = lib_common.CgiEnv(parameters={
        keyMiniDepth: 3,
        keyWithRelative: False,
        keyCheckExistence: True
    })

    pid_as_integer = int(cgiEnv.GetId())

    paramMiniDepth = int(cgiEnv.get_parameters(keyMiniDepth))
    paramWithRelative = bool(cgiEnv.get_parameters(keyWithRelative))
    paramCheckExistence = bool(cgiEnv.get_parameters(keyCheckExistence))

    grph = cgiEnv.GetGraph()

    node_process = lib_common.gUriGen.PidUri(pid_as_integer)

    try:
        obj_parser = _filename_parser_generator()
        rgx_fil_nam = obj_parser.create_regex(paramMiniDepth,
                                              paramWithRelative)
        WARNING("rgx_fil_nam=%s", rgx_fil_nam)

        resu_fil_nams = memory_regex_search.GetRegexMatches(
            pid_as_integer, rgx_fil_nam)

        # This avoids duplicates.
        unique_filenames = set()

        # The file names which are detected in the process memory might be broken, invalid etc...
        # Only some of them are in valid strings. The other may come from deallocated memory etc...
        for idx_fil_nam in resu_fil_nams:
            a_fil_nam_buffer = resu_fil_nams[idx_fil_nam]
            if lib_util.is_py3:
                assert isinstance(a_fil_nam_buffer, bytes)
            else:
                assert isinstance(a_fil_nam_buffer, str)
            is_path, a_fil_nam_list = obj_parser.cleanup_filename(
                a_fil_nam_buffer)

            # print("a_fil_nam_list %s", str(a_fil_nam_list))
            if is_path:
                # This is just a list of directories. This could be an interesting information,
                # but it does not imply the creation or access of actual files and cirectories.
                #WARNING("THIS IS JUST A PATH:%s", str(a_fil_nam_list))
                pass
            else:
                # These files might actuqally be used.
                for one_filename in a_fil_nam_list:
                    #ERROR("ADDING %s", one_filename)
                    if lib_util.is_py3:
                        assert isinstance(one_filename, str)
                    else:
                        assert isinstance(one_filename, unicode)
                    _check_unique_filenames(one_filename, unique_filenames,
                                            paramCheckExistence)

        for a_fil_nam in unique_filenames:
            #DEBUG("a_fil_nam=%s",a_fil_nam)
            node_filnam = lib_common.gUriGen.FileUri(a_fil_nam)
            grph.add((node_process, pc.property_rdf_data_nolist1, node_filnam))

        WARNING("unique file numbers=%d", len(unique_filenames))

    except Exception as exc:
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))

    cgiEnv.OutCgiRdf()

Beispiel #8

0

Datei anzeigen

def Main():
	# Parameter for the minimal depth of the regular expression.
	# min=3, otherwise any string with a "/" will match.
	keyMiniDepth = "Minimum filename depth"

	# Otherwise, only look for absolute filenames.
	keyWithRelative = "Search relative filenames"

	keyCheckExistence = "Check file existence"

	cgiEnv = lib_common.CgiEnv( parameters = { keyMiniDepth : 3, keyWithRelative : False, keyCheckExistence : True })

	pidint = int( cgiEnv.GetId() )

	paramMiniDepth = int(cgiEnv.GetParameters( keyMiniDepth ))
	paramWithRelative = bool(cgiEnv.GetParameters( keyWithRelative ))
	paramCheckExistence = bool(cgiEnv.GetParameters( keyCheckExistence ))


	grph = cgiEnv.GetGraph()

	node_process = lib_common.gUriGen.PidUri(pidint)

	try:
		objParser = FilenameParserFunc()
		rgxFilNam = objParser.Regex(paramMiniDepth,paramWithRelative)
		sys.stderr.write("rgxFilNam=%s\n"%rgxFilNam)

		resuFilNams = memory_regex_search.GetRegexMatches(pidint,rgxFilNam)

		# This avoids duplicates.
		resuClean = set()

		# The file names which are detected in the process memory might be broken, invalid etc...
		# Only some of them are in valid strings. The other may come from deallocated memory etc...
		for idxFilNam in resuFilNams:
			aFilNam = resuFilNams[idxFilNam]

			# Depending opn the regular expression, the result must be adapted.
			aFilNam = objParser.Cleanup(aFilNam)

			if aFilNam in resuClean:
				continue

			# The file must exist. If we cannot access it does not matter.
			# TODO: Must accept if we can access it or not.
			if paramCheckExistence:

				# TODO: Test existence of relative files by prefixing with current directory.
				try:
					oFil = open(aFilNam,"r")
				except:
					exc = sys.exc_info()[1]
					sys.stderr.write("open:%s throw:%s\n"%(aFilNam,str(exc)))
					continue
				if not oFil:
					continue

				oFil.close()


			resuClean.add( aFilNam )

		for aFilNam in resuClean:
			sys.stderr.write("aFilNam=%s\n"%aFilNam)
			nodeFilnam = lib_common.gUriGen.FileUri( aFilNam )
			grph.add( ( node_process, pc.property_rdf_data_nolist1, nodeFilnam ) )

	except Exception:
		exc = sys.exc_info()[1]
		lib_common.ErrorMessageHtml("Error:%s. Protection ?"%str(exc))

	cgiEnv.OutCgiRdf()

Beispiel #9

0

Datei anzeigen

def GetAggregDsns(pidint, mapRgx):
    # "Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes;"
    # 34515015 = "Driver={SQL Server}"
    # 34515035 = "Server=.\SQLEXPRESS"
    # 34515055 = "Database=ExpressDB"
    # 34515074 = "Trusted_Connection=yes"
    # 35634903 = "Driver={SQL Server}"
    # 35634923 = "Server=.\SQLEXPRESS"
    # 35634943 = "Database=ExpressDB"
    # 35634962 = "Trusted_Connection=yes"

    try:

        # Not letter, then the keyword, then "=", then the value regex, then possibly the delimiter.
        rgxDSN = "|".join(
            ["[; ]*" + key + " *= *" + mapRgx[key] + " *" for key in mapRgx])
        # This works also. Both are very slow.
        # rgxDSN = "|".join([ ";? *" + key + " *= *" + survol_odbc.mapRgxODBC[key] + " *" for key in survol_odbc.mapRgxODBC ])
        DEBUG("rgxDSN=%s", rgxDSN)

        # TODO: OPTIONALLY ADD NON-ASCII CHAR AT THE VERY BEGINNING. SLIGHTLY SAFER AND FASTER.
        # rgxDSN = "[^a-zA-Z]" + regDSN

        # Here we receive the matched keywords and their offset in memory.
        # We try to aggregate them if they are contiguous.
        # This will work less if we used a smaller set of DSN connecton strings keywords.
        # This could be fixed with theese remarks:
        # (1) If the difference of offsets is small.
        # (2) Try to extensively scan the memory (All DSN keywords) in the interval of detected common keywords.
        resuMatches = memory_regex_search.GetRegexMatches(
            pidint, rgxDSN, re.IGNORECASE)

        for matchedOffset in resuMatches:
            matchedStr = resuMatches[matchedOffset]
            dsnToken = str(matchedOffset) + " = " + matchedStr + " = " + str(
                matchedOffset + len(matchedStr))
            DEBUG("dsnODBC=%s", dsnToken)

        sortedKeys = sorted(resuMatches.keys())
        aggregDsns = dict()
        lastOffset = 0
        currOffset = 0
        for theOff in sortedKeys:
            currMtch = resuMatches[theOff]
            nextOffset = theOff + len(currMtch)
            DEBUG("lastOffset=%d nextOffset=%d currMtch=%s", lastOffset,
                  nextOffset, currMtch)
            #if lastOffset == 0:
            #	lastOffset = nextOffset
            #	aggregDsns[lastOffset] = currMtch
            #	continue
            if lastOffset == theOff:
                aggregDsns[currOffset] += currMtch
            else:
                # This starts a new DSN string.
                currOffset = theOff
                aggregDsns[currOffset] = currMtch
            lastOffset = nextOffset

        # TODO: Eliminate aggrehated strings containing one or two tokens,
        # because they cannot be genuine DSNs.
        # 29812569: SERVER=\RCHATEAU-HP
        # 34515016: Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes
        # 34801013: SERVER=\RCHATEAU-HP
        # 35634904: Driver={SQL Server};Server=.\SQLEXPRESS;Database=ExpressDB;Trusted_Connection=yes

        return aggregDsns

        # Last pass after aggregation:
        # If several tokens were aggregated and are still separated by a few chars (20, 30 etc...),
        # we can assume that they are part of the same connection string,
        # especially they contain complementary keywords (UID them PWD etc...)
        # So, it does not really matter if some rare keywords are not known.
        # We could have a last pass to extract these keywords: Although we are by definition unable
        # able to use their content explicitely, a complete connection string can still be used
        # to connect to ODBC.

        # http://www.dofactory.com/reference/connection-strings

        # TODO: Instead of just displaying the DSN, connect to it, list tables etc...

    except Exception:
        exc = sys.exc_info()[1]
        lib_common.ErrorMessageHtml("Error:%s. Protection ?" % str(exc))