def test_packetizer():
    packet_dict, highest_layers = networkml.parsers.pcap.reader.packetizer(
        'tests/trace_ab12_2001-01-01_02_03-client-ip-1-2-3-4.pcap')
    assert {'BOOTP_RAW'} == highest_layers['172.16.0.1:67']
    assert {'HTTP_RAW', 'IMAGE-JFIF_RAW',
            'TCP_RAW'} == highest_layers['192.168.3.131:56255']
    assert {'SSL_RAW', 'TCP_RAW'} == highest_layers['172.16.255.1:10659']
    assert {'ICMP_RAW'} == highest_layers['67.215.65.132:0']
    assert {'DNS_RAW'} == highest_layers['192.168.3.131:60629']
    assert len(packet_dict) == 14169
    packet_list = list(packet_dict.items())
    # We have to drop date from comparison because reader.py doesn't use UTC consistently.
    # Migrate to UTC in the future.
    head, data = packet_list[0]
    assert ('40:61:86:9a:f1:f5', '00:1a:8c:15:f9:80') == extract_macs(data)
    assert 983 == packet_size([0, data])
    assert 108 == len(data)
    assert '001a8c15f9804061' == data[:16]
    _, key1, key2 = head
    assert ('192.168.3.131:57011', '72.14.213.138:80') == (key1, key2)
    head, data = packet_list[-1]
    assert ('40:61:86:9a:f1:f5', 'ff:ff:ff:ff:ff:ff') == extract_macs(data)
    assert 148 == packet_size([0, data])
    assert 68 == len(data)
    assert 'ffffffffffff4061' == data[:16]
    _, key1, key2 = head
    assert ('192.168.3.131:17500', '192.168.3.255:17500') == (key1, key2)
def test_extract_macs():
    test_payload = '00' * 20
    macs = extract_macs('123456789ABCDEF123456780' + test_payload)
    assert macs is not None
    source, dest = macs
    assert dest == '12:34:56:78:9a:bc'
    assert source == 'de:f1:23:45:67:80'
    macs = extract_macs('020406080A0C0E0103050700' + test_payload)
    assert macs is not None
    source, dest = macs
    assert dest == '02:04:06:08:0a:0c'
    assert source == '0e:01:03:05:07:00'
    assert None == extract_macs('0000')
    assert None == extract_macs('01005e0000fc0050')
def test_ipv6_packetizer():
    packet_dict, highest_layers = networkml.parsers.pcap.reader.packetizer(
        'tests/trace_ab12_2001-01-01_02_03-client-ip6-1-2-3-4.pcap')
    assert {'ICMPV6_RAW'} == highest_layers['::1:0']
    assert {'DATA_RAW', 'TCP_RAW'} == highest_layers['::1:5201']
    packet_list = list(packet_dict.items())
    head, data = packet_list[0]
    assert ('00:00:00:00:00:00', '00:00:00:00:00:00') == extract_macs(data)
    assert 40 == packet_size([0, data])
    assert 188 == len(data)
def extract_features(session_dict, capture_source=None, max_port=1024):
    '''
    Extracts netflow level features from packet capture.

    Args:
        pcap_path: path to the packet capture to process into features
        max_port:  Maximum port to get features on (default to reading config)

    Returns:
        feature_vector: Vector containing the featurized representation
                        of the input pcap.
    '''

    address_type = 'MAC'

    # If the capture source isn't specified, default to the most used address
    if capture_source is None:
        capture_source = get_source(session_dict, address_type=address_type)
    capture_ip_source = get_source(session_dict, address_type='IP')

    # Initialize some counter variables
    num_sport_init = [0] * max_port
    num_dport_init = [0] * max_port
    num_sport_rec = [0] * max_port
    num_dport_rec = [0] * max_port

    num_sessions_init = 0
    num_external_init = 0
    num_tcp_sess_init = 0
    num_udp_sess_init = 0
    num_icmp_sess_init = 0

    num_sessions_rec = 0
    num_external_rec = 0
    num_tcp_sess_rec = 0
    num_udp_sess_rec = 0
    num_icmp_sess_rec = 0

    # Iterate over all sessions and aggregate the info
    other_ips = defaultdict(int)
    for key, session in session_dict.items():
        address_1, port_1 = get_ip_port(key[0])
        address_2, port_2 = get_ip_port(key[1])

        # Get the first packet and grab the macs from it
        first_packet = session[0][1]
        macs = extract_macs(first_packet)
        if macs is None:
            continue
        source_mac, destination_mac = macs

        # If the source is the cpature source
        if (source_mac == capture_source or address_1 == capture_source):

            if is_private(address_2):
                other_ips[address_2] += 1

            num_sessions_init += 1
            num_external_init += is_external(address_1, address_2)
            num_tcp_sess_init += is_protocol(session, '06')
            num_udp_sess_init += is_protocol(session, '11')
            num_icmp_sess_init += is_protocol(session, '01')

            if int(port_1) < max_port:
                num_sport_init[int(port_1)] += 1

            if int(port_2) < max_port:
                num_dport_init[int(port_2)] += 1

        # If the destination is the capture source
        if (destination_mac == capture_source or address_2 == capture_source):
            if is_private(address_1):
                other_ips[address_1] += 1

            num_sessions_rec += 1
            num_external_rec += is_external(address_2, address_1)
            num_tcp_sess_rec += is_protocol(session, '06')
            num_udp_sess_rec += is_protocol(session, '11')
            num_icmp_sess_rec += is_protocol(session, '01')

            if int(port_1) < max_port:
                num_sport_rec[int(port_1)] += 1
            if int(port_2) < max_port:
                num_dport_rec[int(port_2)] += 1

    num_port_sess = np.concatenate(
        (num_sport_init, num_dport_init, num_sport_rec, num_dport_rec), axis=0)

    if num_sessions_init == 0:
        num_sessions_init += 1
    if num_sessions_rec == 0:
        num_sessions_rec += 1

    num_port_sess = np.asarray(num_port_sess) / \
        (num_sessions_init+num_sessions_rec)

    extra_features = [0] * 8
    extra_features[0] = num_external_init / num_sessions_init
    extra_features[1] = num_tcp_sess_init / num_sessions_init
    extra_features[2] = num_udp_sess_init / num_sessions_init
    extra_features[3] = num_icmp_sess_init / num_sessions_init

    extra_features[4] = num_external_rec / num_sessions_rec
    extra_features[5] = num_tcp_sess_rec / num_sessions_rec
    extra_features[6] = num_udp_sess_rec / num_sessions_rec
    extra_features[7] = num_icmp_sess_rec / num_sessions_rec

    feature_vector = np.concatenate((num_port_sess, extra_features), axis=0)
    return feature_vector, capture_source, list(
        other_ips.keys()), capture_ip_source
def test_extract_macs():
    source, dest = extract_macs('123456789ABCDEF123456780')
    assert dest == '12:34:56:78:9A:BC'
    assert source == 'DE:F1:23:45:67:80'