def main(xml_input_file, json_output_file, pretty_print_json):
    """Given an XML input file, create an output JSON file.  Also returns a dictionary object if used as a module.

    Arguments:
        input_file {string} -- Input XML file.
        output_file {string} -- Output XML file.
    """

    json_data = []

    try:
        with open(xml_input_file, "r") as fh_xml:
            with open(json_output_file, "w") as fh_json:

                xmljson_object = xmljson.XMLData()

                if pretty_print_json:
                    json.dump(xmljson_object.data(parse(fh_xml).getroot()),
                              fh_json,
                              indent=4)
                else:
                    json.dump(xmljson_object.data(parse(fh_xml).getroot()),
                              fh_json)

        # Load file as a json object to return.
        with open(json_output_file, "r") as fh:
            json_data = json.load(fh)

    except Exception as e:
        print(f"[-] Exception: {e}")

    return json_data
Esempio n. 2
0
def get_xml_data(xml_path: str) -> Dict[str, Any]:
    if not os.path.exists(xml_path):
        return {}

    with open(xml_path, 'r') as xmlfile_ctx:
        xml_content = xmlfile_ctx.read()

    # Remove variables from xml.in files
    xml_content = re.sub(r'@([A-Za-z0-9_]+)@', r'AT_\1_AT', xml_content)

    # Like Yahoo converter, but don't omit 'content' if
    # there are no attributes.
    converter = xmljson.XMLData(xml_fromstring=False,
                                simple_text=False,
                                text_content="content")
    root = xml.etree.ElementTree.fromstring(xml_content)
    xml_data: Dict[str, Any] = converter.data(root)

    # Parsed XML Data will contain OrderedDict() as empty
    # value which converts to 'OrderedDict()' instead of ''
    # in the templates. Remove empty fields instead.
    xml_data = purify(xml_data)

    return xml_data
Esempio n. 3
0
    def process(cls, template_dir, destination, template_vars):
        """ Process templates """
        class _TreeUndefined(jinja2.Undefined):
            def __getitem__(self, key):
                return self

            def __getattr__(self, key):
                return self

        template_dir = os.path.join(TEMPLATE_DIR, template_dir)
        template_env = jinja2.Environment(
            loader=jinja2.FileSystemLoader(template_dir),
            trim_blocks=True,
            lstrip_blocks=True,
            keep_trailing_newline=True,
            undefined=_TreeUndefined)

        def surround(items, string, prepend=True, append=True):
            """ Surrounds each element in a list by the given string """
            return [
                '{}{}{}'.format(string if prepend is True else '', element,
                                string if append is True else '')
                for element in items
            ]

        template_env.filters["surround"] = surround

        def regex_replace(string, find, replace, multiline=False):
            """ Replaces regex in string """
            flags = 0
            if multiline:
                flags += re.MULTILINE
            return re.sub(find, replace, string, flags=flags)

        template_env.filters["regex_replace"] = regex_replace

        def get_list(value):
            """ Returns a list (existing list or new single elemented list) """
            return value if isinstance(value, list) else [value]

        template_env.filters["get_list"] = get_list

        # Loop over all templates
        for infile in utils.list_all_files(template_dir):

            # Files may have templatized names
            if '{{' in infile and '}}' in infile:
                outfile = jinja2.Template(infile).render(template_vars)
            else:
                outfile = infile
            outfile_name, extension = os.path.splitext(outfile)

            # Create directories if necessary
            utils.ensure_directory_exists(
                os.path.dirname(os.path.join(destination, outfile)))

            # Files that end with .j2 are templates
            if extension == '.j2':
                print("  Generating {}".format(outfile_name))
                outfile_path = os.path.join(destination, outfile_name)

                # Make content of already existing XML files available in
                # the template. That way templates can decide what data to keep
                # or override.
                if '.xml' in infile and os.path.isfile(outfile_path):
                    with open(outfile_path, 'r') as xmlfile_ctx:
                        xml_content = xmlfile_ctx.read()

                    # Remove variables from xml.in files
                    xml_content = re.sub(r'@([A-Za-z0-9_]+)@', r'AT_\1_AT',
                                         xml_content)
                    xml_data = {}
                    try:
                        # Like Yahoo converter, but don't omit 'content' if
                        # there are no attributes.
                        converter = xmljson.XMLData(xml_fromstring=False,
                                                    simple_text=False,
                                                    text_content="content")
                        root = xml.etree.ElementTree.fromstring(xml_content)
                        xml_data = converter.data(root)

                        # Parsed XML Data will contain OrderedDict() as empty
                        # value which converts to 'OrderedDict()' instead of ''
                        # in the templates. Remove empty fields instead.
                        xml_data = utils.purify(xml_data)
                    except xml.etree.ElementTree.ParseError as err:
                        print("Failed to parse {}: {}".format(
                            outfile_path, err))
                    template_vars.update({'xml': xml_data})

                # Make the datetime of strings files the existing datetime
                if '.po' in infile and os.path.isfile(outfile_path):
                    with open(outfile_path, 'r') as stringsfile_ctx:
                        strings_content = stringsfile_ctx.read()

                    datere = re.compile(r'"POT-Creation-Date: (.*)\\n"')
                    try:
                        timestamp = datere.search(strings_content).group(1)
                        template_vars.update({'datetime': timestamp})
                    except AttributeError:
                        raise

                template = template_env.get_template(infile)
                content = template.render(template_vars)
                if content:
                    with open(outfile_path, 'w') as outfile_ctx:
                        outfile_ctx.write(content)

            # Other files are just copied
            else:
                print("     Copying {}{}".format(outfile_name, extension))
                shutil.copyfile(os.path.join(template_dir, infile),
                                os.path.join(destination, outfile))
Esempio n. 4
0
def generate_diff(
    xml_file_a,
    xml_file_b,
    ignore_udp_open_filtered=False,
    output_type="xml",
    write_summary_to_disk_for_xml_output_type=False,
    verbose=False,
    step_debug=False,
):

    start_time = time.time()

    if output_type not in ["txt", "xml"]:
        print(f'Invalid output type: "{output_type}".  Must be "txt" or "xml"')
        return None

    if not os.path.exists(xml_file_a):
        print(f"File does not exist: {xml_file_a}")
        return None

    if not os.path.exists(xml_file_b):
        print(f"File does not exist: {xml_file_b}")
        return None

    if xml_file_a == xml_file_b:
        print("Provided scan files are the same, they must be different.")
        return None

    # Load XML scan data from files.
    try:
        print(f'Loading scan "a" file: {xml_file_a}')
        scan_a = ndiff.Scan()
        scan_a.load_from_file(xml_file_a)
        print(f'Successfully loaded scan "a" file: {xml_file_a}')
    except Exception as e:
        print(f"Exception loading {xml_file_a}: {e}")
        return None

    try:
        print(f'Loading scan "b" file: {xml_file_b}')
        scan_b = ndiff.Scan()
        scan_b.load_from_file(xml_file_b)
        print(f'Successfully loaded scan "b" file: {xml_file_b}')
    except Exception as e:
        print(f"Exception loading {xml_file_b}: {e}")
        return None

    # Generate the diff file basename to be used.
    xml_file_a_basefile_name = os.path.splitext(
        os.path.basename(xml_file_a))[0]
    xml_file_b_basefile_name = os.path.splitext(
        os.path.basename(xml_file_b))[0]
    diff_file_basename = f"{xml_file_a_basefile_name}_DIFF_{xml_file_b_basefile_name}"

    # TXT
    if output_type == "txt":
        # Text output like the ndiff --text switch uses.  Not easy to analyze for large diffs, but including this as an
        # option to return. Easiest to write to a file, read it, and return the string.

        try:
            txt_stream = io.StringIO()
            diff = ndiff.ScanDiffText(scan_a, scan_b, txt_stream)
            # The .output() function writes output to txt_stream.
            cost = diff.output()

            print(f"ndiff cost: {cost}")

        except Exception as e:
            print(
                f"Exception: {e}.  Issue writting ndiff TXT output to a io.StringIO object."
            )
            return None

        scan_summary = txt_stream.getvalue()

        print(scan_summary)

        return scan_summary

    # XML
    try:
        xml_stream = io.StringIO()
        diff = ndiff.ScanDiffXML(scan_a, scan_b, xml_stream)
        # The .output() function writes output to xml_stream.
        cost = diff.output()

        print(f"ndiff cost: {cost}")

    except Exception as e:
        print(
            f"Exception: {e}.  Issue writting ndiff XML output to a io.StringIO object."
        )
        return None

    # Convert from XML to a JSON object for easier parsing.
    try:
        xmljson_object = xmljson.XMLData()
        xmljson_object_as_string = xmljson_object.data(
            fromstring(xml_stream.getvalue()))
        json_data = json.loads(json.dumps(xmljson_object_as_string))

    except Exception as e:
        print(f"Exception: {e}.  Issue converting XML to JSON.")
        return None

    # Extract Nmap stats.
    a_stats = json_data["nmapdiff"]["scandiff"]["a"]["nmaprun"]
    b_stats = json_data["nmapdiff"]["scandiff"]["b"]["nmaprun"]

    # Calculate time math.
    scan_delta_seconds = b_stats["start"] - a_stats["start"]
    scan_delta_minutes = round(scan_delta_seconds / 60, 2)
    scan_delta_hours = round(scan_delta_minutes / 60, 2)
    scan_delta_days = round(scan_delta_hours / 24, 2)

    # Initialize change summary text with scan stat info.
    change_summary = f"""Scan summary

Previous scan start time: {a_stats['startstr']}
Latest scan start time:   {b_stats['startstr']}
Time between scans: {scan_delta_seconds} seconds / {scan_delta_minutes} minutes / {scan_delta_hours} hours / {scan_delta_days} days

"""

    hostdiff_raw = json_data["nmapdiff"]["scandiff"]["hostdiff"]

    # Standardize hostdiff to be a list so it can be iterated through.
    # Single host (dict object) - add hostdiff dict to an empty list so it can be iterated through.
    if type(hostdiff_raw) is dict:
        hostdiff = []
        hostdiff.append(hostdiff_raw)
    # Multiple hosts (list of dicts) - hostdiff is already a list of dicts.
    else:
        hostdiff = hostdiff_raw
    """
    Order of diff checks:

    1) Host online/offline state detection with updated port status.
    2) Service name change detection on port.
    3) Port status change detection on host.

    This was a beast to get to a decent state.  There may be some fringe edge cases that may not work or provide a
    correct assessment of the scan diff.  Best way to analyze is to use the -d and -v switches to step through each diff
    and inspect manually.
    """

    # Track any scan events in this variable.
    scan_events = ""

    # Iterate through the hosts.
    for host in hostdiff:

        if verbose:
            print("=" * 10)
            print(f"Host:\n{json.dumps(host, indent=4)}")
            print("Events:")

        try:

            # 1) Host online/offline state detection.
            # Host returned a port status (closed, open) for scan "a", but not for scan "b", so it's offline.
            if "a" in host:

                if host["a"]["host"]["status"]["state"] == "up":
                    target = host["a"]["host"]["address"]["addr"]

                    # 1) Determine which ports are now closed, if any.
                    try:
                        port_raw = host["a"]["host"]["ports"]["port"]
                    # Host could be up from an ICMP echo reply and not necessarily have any closed ports.
                    except KeyError:
                        continue

                    # For a single port, add port_raw dict to empty list so it can iterated through.
                    if type(port_raw) is dict:
                        port_raw_list = []
                        port_raw_list.append(port_raw)
                    # For multiple ports, port_raw is already a list of dicts.
                    else:
                        port_raw_list = port_raw

                    for port in port_raw_list:

                        protocol = port["protocol"].upper()
                        portid = port["portid"]
                        state = port["state"]["state"]

                        if ignore_udp_open_filtered:
                            if protocol == "UDP" and state == "open|filtered":
                                continue

                        # Don't bother showing explicit closed ports.
                        if state == "closed":
                            continue

                        # Host is down, so every port is now closed.
                        event = f"{target} port {protocol} {portid} is closed"
                        if verbose:
                            print(f"1a - {event}")
                            if step_debug:
                                input("Press Enter to continue...")
                        scan_events += f"{event}\n"

                    # 2) Just notify that the host is offline with non-specified ports.  Not as useful.
                    # event = f"{target} is offline"
                    # if verbose:
                    #     print(f"1a - {event}")
                    #     input("Press Enter to continue...")
                    # scan_events += f"{event}\n"

                    continue

            # Host returned a port status (closed, open) for scan "b", but not for scan "a", so it's online.
            if "b" in host:

                if host["b"]["host"]["status"]["state"] == "up":
                    target = host["b"]["host"]["address"]["addr"]

                    # 1) Determine which ports are now open, if any.
                    try:
                        port_raw = host["b"]["host"]["ports"]["port"]
                    # Host could be up from an ICMP echo reply and not necessarily have any open ports.
                    except KeyError:
                        continue

                    # For a single port, add port_raw dict to empty list so it can iterated through.
                    if type(port_raw) is dict:
                        port_raw_list = []
                        port_raw_list.append(port_raw)
                    # For multiple ports, port_raw is already a list of dicts.
                    else:
                        port_raw_list = port_raw

                    for port in port_raw_list:

                        protocol = port["protocol"].upper()
                        portid = port["portid"]
                        state = port["state"]["state"]

                        if ignore_udp_open_filtered:
                            if protocol == "UDP" and state == "open|filtered":
                                continue

                        # Don't bother showing explicit closed ports.
                        if state == "closed":
                            continue

                        # Build the service name of the new online port.
                        service_dict = port["service"]
                        nmap_service_name = build_nmap_service_name(
                            service_dict)

                        event = f'{target} port {protocol} {portid} is {state} with service "{nmap_service_name}"'
                        if verbose:
                            print(f"1b - {event}")
                            if step_debug:
                                input("Press Enter to continue...")
                        scan_events += f"{event}\n"

                    # 2) Just notify that the host is online with non-specified ports.  Not as useful.
                    # event = f"{target} is online"
                    # if verbose:
                    #     print(f"1b - {event}")
                    #     input("Press Enter to continue...")
                    # scan_events += f"{event}\n"

                    continue

            # Move on if there is no portdiff to analyze.  This could include an operating system change which might
            # be added later.
            if "portdiff" not in host["host"]["ports"]:
                continue

            # Extract the target.
            target = host["host"]["address"]["addr"]

            # Extract the portdiff dict.
            portdiff_raw = host["host"]["ports"]["portdiff"]

            # For a single port, add portdiff dict to empty list so it can be iterated through.
            if type(portdiff_raw) is dict:
                portdiffs = []
                portdiffs.append(portdiff_raw)
            # For multiple ports, portdiff is already a list of dicts.
            else:
                portdiffs = portdiff_raw

            for portdiff in portdiffs:

                # 2) Service name change detection.
                if "b" not in portdiff:

                    port = portdiff["port"]["portid"]
                    protocol = portdiff["port"]["protocol"].upper()

                    # Build complete "a" service name.
                    service_dict_a = portdiff["port"]["a"]["service"]
                    portdiff_a_service = build_nmap_service_name(
                        service_dict_a)

                    # Build complete "b" service name.
                    service_dict_b = portdiff["port"]["b"]["service"]
                    portdiff_b_service = build_nmap_service_name(
                        service_dict_b)

                    event = f'{target} {protocol} {port} service changed from "{portdiff_a_service}" to "{portdiff_b_service}"'

                    if verbose:
                        print(f"2 - {event}")
                        if step_debug:
                            input("Press Enter to continue...")

                    scan_events += f"{event}\n"

                    continue

                # 3) Port status change detection.  Use "b" scan as source of truth to determine if a port state changed.
                b_dict = portdiff["b"]["port"]
                b_dict_port = b_dict["portid"]
                b_dict_protocol = b_dict["protocol"].upper()

                # Add explicit "closed" state to b_dict, having the dictionary reflect what is in the
                # portdiff["a"]["port"] dictionary.
                a_dict = portdiff["a"]["port"]

                # TODO doublecheck this logic.
                if "state" not in b_dict and "state" in a_dict:
                    if a_dict["state"]["state"] != "closed":
                        b_dict["state"] = {}
                        b_dict["state"]["state"] = "closed"

                    else:
                        continue

                b_dict_state = b_dict["state"]["state"]

                # Ignore UDP ports that aren't definitively "open".  Ignores if either scan a or b have a UDP port with
                # the "open|filtered" state.
                if ignore_udp_open_filtered:

                    if b_dict_protocol == "UDP" and b_dict_state == "open|filtered":
                        continue

                    # Ignore state changes from when a's scan is UDP "open|filtered".
                    a_dict = portdiff["a"]["port"]
                    a_dict_protocol = a_dict["protocol"].upper()

                    try:
                        a_dict_state = a_dict["state"]["state"]
                    except KeyError:
                        a_dict_state = "closed"

                    if a_dict_protocol == "UDP" and a_dict_state == "open|filtered":
                        continue

                event = f"{target} {b_dict_protocol} {b_dict_port} changed state to {b_dict_state}"

                # Build the service name of the new online port.
                if b_dict_state == "open":
                    service_dict = b_dict["service"]
                    nmap_service_name = build_nmap_service_name(service_dict)
                    event += f' with service "{nmap_service_name}"'

                if verbose:
                    print(f"3 - {event}")
                    if step_debug:
                        input("Press Enter to continue...")

                scan_events += f"{event}\n"

        except Exception as e:
            print(
                f"Exception: {e}.  Issue processing: {json.dumps(host, indent=4)}"
            )
            traceback.print_exc()
            return None

    # Determine total processing time.
    finish_time = time.time()
    total_processing_time = round(finish_time - start_time, 2)
    total_processing_time_message = f"Total processing time: {total_processing_time} seconds\n\n"
    print(total_processing_time_message)
    change_summary += f"{total_processing_time_message}"

    # Determine final change summary results.
    if not scan_events:
        change_summary += "No scan diff detected between scans."
    else:
        change_summary += scan_events

    # Print out change summary.
    print("=" * 30)
    print(change_summary)

    # Optionally write the results to a file.
    if write_summary_to_disk_for_xml_output_type:
        change_summary_file_name = f"{diff_file_basename}.summary"
        with open(change_summary_file_name, "w") as fh:
            fh.write(change_summary)

    return change_summary