Ejemplo n.º 1
0
def graph_dns(g, df_dns):
    # Iterate through all the flows
    for i in df_dns.index:
        # Create the DNSTransaction node
        # name = str(df_dns.loc[i]["trans_id"])
        name = "%d - %s - %s" % (df_dns.loc[i]["trans_id"],
                                 df_dns.loc[i]["qtype_name"],
                                 df_dns.loc[i]["query"])
        timestamp = df_dns.loc[i]["ts"]
        flowname = df_dns.loc[i]["uid"]

        # Pick out the properties that belong on the transaction and add
        # them
        transaction = g.dnsTransaction.create(name=name,
                                              ts=df_dns.loc[i]["ts"],
                                              proto=df_dns.loc[i]["proto"],
                                              orig_p=df_dns.loc[i]["id.orig_p"],
                                              resp_p=df_dns.loc[i]["id.resp_p"],
                                              qclass=df_dns.loc[i]["qclass"],
                                              qclass_name=df_dns.loc[i]["qclass_name"],
                                              qtype=df_dns.loc[i]["qtype"],
                                              qtype_name=df_dns.loc[i]["qtype_name"],
                                              rcode=df_dns.loc[i]["rcode"],
                                              rcode_name=df_dns.loc[i]["rcode_name"],
                                              AA=df_dns.loc[i]["AA"],
                                              TC=df_dns.loc[i]["TC"],
                                              RD=df_dns.loc[i]["RD"],
                                              RA=df_dns.loc[i]["RA"],
                                              Z=df_dns.loc[i]["Z"],
                                              rejected=df_dns.loc[i]["rejected"])

        # Create a node + edge for the query, if there is one in the log
        if df_dns.loc[i]["query"]:
            fqdn = g.fqdn.get_or_create("name", df_dns.loc[i]["query"],
                                        {"name":df_dns.loc[i]["query"],
                                         "domain":df_dns.loc[i]["query"]})
            g.lookedUp.create(transaction,fqdn)

            # Now create the nodes and edges for the domains or addresses in
            # the answer (if there is an answer).  There can be multiple
            # answers, so split this into a list and create one node + edge
            # for each.
            #
            # There should also be one TTL per answer, so we'll split those and
            # use array indices to tie them together. The arrays are supposed
            # to always be the same length, but maybe sometimes they are
            # not.  We'll force the issue by extending the TTL list to be
            # the same size as the address list.
            if df_dns.loc[i]["answers"]:
                addrs = df_dns.loc[i]["answers"].split(",")
                ttls = df_dns.loc[i]["TTLs"].split(",")
                ttls = extend_list(ttls, ttls[len(ttls)-1],len(addrs))

                for i in range(len(addrs)):
                    ans = addrs[i]
                    ttl = float(ttls[i])
                    # DNS answers can be either IPs or other names. Figure
                    # out which type of node to create for each answer.
                    if is_IP(ans):
                        node = g.host.get_or_create("name",ans,{"name":ans,
                                                                "address":ans})
                    else:
                        node = g.fqdn.get_or_create("name",ans,{"name":ans,
                                                                "address":ans})

                    g.resolvedTo.create(fqdn, node, {"ts":timestamp})
                    g.answer.create(transaction, node, {"TTL": ttl})

        # Create a node + edge for the source of the DNS transaction
        # (the client host)
        if df_dns.loc[i]["id.orig_h"]:
            src = g.host.get_or_create("name", df_dns.loc[i]["id.orig_h"],
                                       {"name": df_dns.loc[i]["id.orig_h"],
                                        "address":df_dns.loc[i]["id.orig_h"]})
            g.queried.create(src, transaction)

        # Create a node + edge for the destination of the DNS transaction
        # (the DNS server)
        if df_dns.loc[i]["id.resp_h"]:
            dst = g.host.get_or_create("name", df_dns.loc[i]["id.resp_h"],
                                       {"name": df_dns.loc[i]["id.resp_h"],
                                        "address":df_dns.loc[i]["id.resp_h"]})
            g.queriedServer.create(transaction,dst)


        # Now connect this transaction to the correct flow
        flows = g.flow.index.lookup(name=flowname)
        if flows == None:
            # print "ERROR: Flow '%s' does not exist" % flowname
            pass
        else:
            # lookup returns a generator, but since there should only be one
            # flow with this name, just take the first one
            flow = flows.next()
            nodes = flow.outV("contains")
            if nodes == None or not (transaction in nodes):
                edge = g.contains.create(flow, transaction)


        # Associate the src host with the FQDN it resolved.  Since a host
        # can resolve a domain multiple times, we'll also keep track of a
        # "weight" feature to count how many times this happened.
        if df_dns.loc[i]["query"]:
            neighbors = src.outV("resolved")
            if neighbors == None or not (fqdn in neighbors):
                e = g.resolved.create(src, fqdn)
                e.weight=1
                e.save()
            else:
                edges = edge_list(g, src._id, fqdn._id, "resolved")
                # There should only be one of these edges, and we already know
                # it exists, so it's safe to just take the first one
                edge = edges.next()
                g.resolved.update(edge._id, weight=(edge.weight + 1))
Ejemplo n.º 2
0
def graph_dns(g, df_dns):
    # Iterate through all the flows
    for i in df_dns.index:
        # Create the DNSTransaction node
        # name = str(df_dns.loc[i]["trans_id"])
        name = "%d - %s - %s" % (df_dns.loc[i]["trans_id"],
                                 df_dns.loc[i]["qtype_name"],
                                 df_dns.loc[i]["query"])
        timestamp = df_dns.loc[i]["ts"]
        flowname = df_dns.loc[i]["uid"]

        # Pick out the properties that belong on the transaction and add
        # them
        transaction = g.dnsTransaction.create(
            name=name,
            ts=df_dns.loc[i]["ts"],
            proto=df_dns.loc[i]["proto"],
            orig_p=df_dns.loc[i]["id.orig_p"],
            resp_p=df_dns.loc[i]["id.resp_p"],
            qclass=df_dns.loc[i]["qclass"],
            qclass_name=df_dns.loc[i]["qclass_name"],
            qtype=df_dns.loc[i]["qtype"],
            qtype_name=df_dns.loc[i]["qtype_name"],
            rcode=df_dns.loc[i]["rcode"],
            rcode_name=df_dns.loc[i]["rcode_name"],
            AA=df_dns.loc[i]["AA"],
            TC=df_dns.loc[i]["TC"],
            RD=df_dns.loc[i]["RD"],
            RA=df_dns.loc[i]["RA"],
            Z=df_dns.loc[i]["Z"],
            rejected=df_dns.loc[i]["rejected"])

        # Create a node + edge for the query, if there is one in the log
        if df_dns.loc[i]["query"]:
            fqdn = g.fqdn.get_or_create("name", df_dns.loc[i]["query"], {
                "name": df_dns.loc[i]["query"],
                "domain": df_dns.loc[i]["query"]
            })
            g.lookedUp.create(transaction, fqdn)

            # Now create the nodes and edges for the domains or addresses in
            # the answer (if there is an answer).  There can be multiple
            # answers, so split this into a list and create one node + edge
            # for each.
            #
            # There should also be one TTL per answer, so we'll split those and
            # use array indices to tie them together. The arrays are supposed
            # to always be the same length, but maybe sometimes they are
            # not.  We'll force the issue by extending the TTL list to be
            # the same size as the address list.
            if df_dns.loc[i]["answers"]:
                addrs = df_dns.loc[i]["answers"].split(",")
                ttls = df_dns.loc[i]["TTLs"].split(",")
                ttls = extend_list(ttls, ttls[len(ttls) - 1], len(addrs))

                for i in range(len(addrs)):
                    ans = addrs[i]
                    ttl = float(ttls[i])
                    # DNS answers can be either IPs or other names. Figure
                    # out which type of node to create for each answer.
                    if is_IP(ans):
                        node = g.host.get_or_create("name", ans, {
                            "name": ans,
                            "address": ans
                        })
                    else:
                        node = g.fqdn.get_or_create("name", ans, {
                            "name": ans,
                            "address": ans
                        })

                    g.resolvedTo.create(fqdn, node, {"ts": timestamp})
                    g.answer.create(transaction, node, {"TTL": ttl})

        # Create a node + edge for the source of the DNS transaction
        # (the client host)
        if df_dns.loc[i]["id.orig_h"]:
            src = g.host.get_or_create(
                "name", df_dns.loc[i]["id.orig_h"], {
                    "name": df_dns.loc[i]["id.orig_h"],
                    "address": df_dns.loc[i]["id.orig_h"]
                })
            g.queried.create(src, transaction)

        # Create a node + edge for the destination of the DNS transaction
        # (the DNS server)
        if df_dns.loc[i]["id.resp_h"]:
            dst = g.host.get_or_create(
                "name", df_dns.loc[i]["id.resp_h"], {
                    "name": df_dns.loc[i]["id.resp_h"],
                    "address": df_dns.loc[i]["id.resp_h"]
                })
            g.queriedServer.create(transaction, dst)

        # Now connect this transaction to the correct flow
        flows = g.flow.index.lookup(name=flowname)
        if flows == None:
            # print "ERROR: Flow '%s' does not exist" % flowname
            pass
        else:
            # lookup returns a generator, but since there should only be one
            # flow with this name, just take the first one
            flow = flows.next()
            nodes = flow.outV("contains")
            if nodes == None or not (transaction in nodes):
                edge = g.contains.create(flow, transaction)

        # Associate the src host with the FQDN it resolved.  Since a host
        # can resolve a domain multiple times, we'll also keep track of a
        # "weight" feature to count how many times this happened.
        if df_dns.loc[i]["query"]:
            neighbors = src.outV("resolved")
            if neighbors == None or not (fqdn in neighbors):
                e = g.resolved.create(src, fqdn)
                e.weight = 1
                e.save()
            else:
                edges = edge_list(g, src._id, fqdn._id, "resolved")
                # There should only be one of these edges, and we already know
                # it exists, so it's safe to just take the first one
                edge = edges.next()
                g.resolved.update(edge._id, weight=(edge.weight + 1))
Ejemplo n.º 3
0
def graph_flows(g, df_conn):
    # Iterate through all the flows
    for con in df_conn.index:
        # For each flow, create new Host objects if necessary.  Then create a
        # new Flow, and add the relationships between the Hosts and the Flow
  
        # Create the source & dest nodes
        src_host = g.host.get_or_create("name",
                                        df_conn.loc[con]["id.orig_h"],
                                        {"name": df_conn.loc[con]["id.orig_h"],
                                         "address":df_conn.loc[con]["id.orig_h"]
                                     })
        dst_host = g.host.get_or_create("name",
                                        df_conn.loc[con]["id.resp_h"],
                                        {"name": df_conn.loc[con]["id.resp_h"],
                                         "address":df_conn.loc[con]["id.resp_h"]
                                     })

        # If the flow is marked "local_orig", we need to update this feature
        # on the source host.  We can't do this at creation time because we
        # might have seen this host before in another context, and created a
        # node for it without knowing it was a local host.
        if df_conn.loc[con]["local_orig"] == "T":
            src_host.local = "T"
            src_host.save()

        # Create the Flow object.  Since we can run the same log file through
        # multiple times, or observe the same flow from different log files,
        # assume flows with the same name are actually the same flow.

        flowname = df_conn.loc[con]["uid"]
        # Create the flow node, with all the rich data
        properties = dict(df_conn.loc[con])
        # Manually assign the "name" property
        properties["name"] = flowname
        # Take out the info about the source & dest IPs, since we should be
        # getting them from the connected host nodes
        del properties["id.orig_h"]
        del properties["id.resp_h"]
        
        flow = g.flow.get_or_create("name", flowname, properties)

        # Create the edges for this flow, if they don't already exist
        nodes = flow.inV("source")
        if nodes == None or not (src_host in nodes):
            g.source.create(src_host, flow)
        
        nodes = flow.outV("dest")
        if nodes == None or not (dst_host in nodes):
            g.dest.create(flow, dst_host)

        # Make a direct link between the src and dest hosts, as this
        # is a common analysis task.  It doesn't *always* make sense
        # to go through the flows.
        neighbors = src_host.outV("connectedTo")
        if neighbors == None or not (dst_host in neighbors):
            e = g.connectedTo.create(src_host, dst_host)
            e.weight=1
            e.save()
        else:
            edges = edge_list(g, src_host._id, dst_host._id, "connectedTo")
            # There should only be one of these edges, and we already know
            # it exists, so it's safe to just take the first one
            edge = edges.next()
            g.connectedTo.update(edge._id, weight=(edge.weight + 1))
Ejemplo n.º 4
0
def graph_flows(g, df_conn):
    # Iterate through all the flows
    for con in df_conn.index:
        # For each flow, create new Host objects if necessary.  Then create a
        # new Flow, and add the relationships between the Hosts and the Flow

        # Create the source & dest nodes
        src_host = g.host.get_or_create(
            "name", df_conn.loc[con]["id.orig_h"], {
                "name": df_conn.loc[con]["id.orig_h"],
                "address": df_conn.loc[con]["id.orig_h"]
            })
        dst_host = g.host.get_or_create(
            "name", df_conn.loc[con]["id.resp_h"], {
                "name": df_conn.loc[con]["id.resp_h"],
                "address": df_conn.loc[con]["id.resp_h"]
            })

        # If the flow is marked "local_orig", we need to update this feature
        # on the source host.  We can't do this at creation time because we
        # might have seen this host before in another context, and created a
        # node for it without knowing it was a local host.
        if df_conn.loc[con]["local_orig"] == "T":
            src_host.local = "T"
            src_host.save()

        # Create the Flow object.  Since we can run the same log file through
        # multiple times, or observe the same flow from different log files,
        # assume flows with the same name are actually the same flow.

        flowname = df_conn.loc[con]["uid"]
        # Create the flow node, with all the rich data
        properties = dict(df_conn.loc[con])
        # Manually assign the "name" property
        properties["name"] = flowname
        # Take out the info about the source & dest IPs, since we should be
        # getting them from the connected host nodes
        del properties["id.orig_h"]
        del properties["id.resp_h"]

        flow = g.flow.get_or_create("name", flowname, properties)

        # Create the edges for this flow, if they don't already exist
        nodes = flow.inV("source")
        if nodes == None or not (src_host in nodes):
            g.source.create(src_host, flow)

        nodes = flow.outV("dest")
        if nodes == None or not (dst_host in nodes):
            g.dest.create(flow, dst_host)

        # Make a direct link between the src and dest hosts, as this
        # is a common analysis task.  It doesn't *always* make sense
        # to go through the flows.
        neighbors = src_host.outV("connectedTo")
        if neighbors == None or not (dst_host in neighbors):
            e = g.connectedTo.create(src_host, dst_host)
            e.weight = 1
            e.save()
        else:
            edges = edge_list(g, src_host._id, dst_host._id, "connectedTo")
            # There should only be one of these edges, and we already know
            # it exists, so it's safe to just take the first one
            edge = edges.next()
            g.connectedTo.update(edge._id, weight=(edge.weight + 1))