Esempio n. 1
0
def process_file(rootdir, filename, fieldfilter, b_redis, disable_json, ck):
    if disable_json:
        fn = os.path.basename(filename)
        if red.sismember("FILES", fn):
            sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n')
            sys.exit(0)
        # FIXME Users have to be carefull with the files extensions to not process data from capture files
        # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module)
        
        # List of fields that are included in the json documents that should not be ranked
        # FIXME Put this as argument to the program as this list depends on the documents that is introduced
        non_index = ['', 'filename', 'sensorname', 'timestamp', 'packet_id']
    
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # FIXME Put in config file
    
    tshark_fields = potiron.tshark_fields
    cmd = "tshark -n -q -Tfields "
    if fieldfilter:
        if 'frame.time_epoch' not in fieldfilter:
            fieldfilter.insert(0, 'frame.time_epoch')
        if 'ip.proto' not in fieldfilter:
            fieldfilter.insert(1, 'ip.proto')
        for p in fieldfilter:
            cmd += "-e {} ".format(p)
    else:
        for f in tshark_fields:
            cmd += "-e {} ".format(f)
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    
    json_fields = potiron.json_fields
    special_fields = {'length': -1, 'ipttl': -1, 'iptos': 0, 'tcpseq': -1, 'tcpack': -1, 'icmpcode': 255, 'icmptype': 255}
    
    if disable_json:
        # If redis key 'BPF' already exists
        if red.keys('BPF'):
            # Check is the current bpf is the same as the one previously used
            if not red.sismember('BPF', bpf):
                bpf_string = str(red.smembers('BPF'))
                sys.stderr.write('[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n'.format(bpf_string[3:-2]))
                sys.exit(0)
        # On the other case, add the bpf in the key 'BPF'
        else:
            red.sadd('BPF', bpf)
    
        # If combined keys are used
        if ck:
            # If redis key 'CK' already exists ...
            if red.keys('CK'):
                # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis
                if red.sismember('CK','NO'):
                    sys.stderr.write('[INFO] Combined key are not used in this redis dataset.\n')
                    sys.exit(0)
            # If redis key 'CK' does not exist ...
            else:
                red.sadd('CK','YES')
        # If combined key are not used, the key 'CK' should exist anyway, with the value 'None'
        else:
            # If redis key 'CK' already exists ...
            if red.keys('CK'):
                # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis
                if red.sismember('CK','YES'):
                    sys.stderr.write('[INFO] Combined key are used in this redis dataset.\n')
                    sys.exit(0)
            # On the other case, we add it
            else:
                red.sadd('CK','NO')
    
        red.sadd("FILES", fn)
        
        potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3]
        protocols_path = "{}doc/protocols".format(potiron_path)
        protocols = potiron.define_protocols(protocols_path)
        
        lastday = None
        prot = []
        for line in proc.stdout.readlines():
            line = line[:-1].decode()
            packet = {}
            tab_line = line.split(' ')
            for i in range(len(tab_line)):
                if fieldfilter:
                    valname = json_fields[tshark_fields.index(fieldfilter[i])]
                else:
                    valname = json_fields[i]
                if valname in special_fields:
                    v = special_fields[valname]
                    try:
                        v = int(tab_line[i])
                    except ValueError:
                        pass
                    packet[valname] = v
                else:
                    packet[valname] = tab_line[i]
            fill_packet(packet, disable_json)
            timestamp = packet['timestamp']
            if ck:
                protocol = protocols[str(packet['protocol'])]
                rKey = "{}:{}:{}".format(sensorname, protocol, timestamp)
                if protocol not in prot:
                    prot.append(protocol)
            else:
                rKey = "{}:{}".format(sensorname, timestamp)
            p = red.pipeline()
            if timestamp != lastday:
                p.sadd("DAYS", timestamp)
                lastday = timestamp
            for f in packet:
                if f not in non_index:
                    feature = packet[f]
                    redisKey = "{}:{}".format(rKey, f)
                    p.sadd("FIELDS", f)
                    p.zincrby(redisKey, feature, 1)
            p.execute()
        if ck:
            for pr in prot:
                red.sadd("PROTOCOLS", pr)
        potiron.infomsg('Data from {} stored into redis'.format(filename))
        
    else:
        allpackets = []
        # Describe the source
        allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                           "filename": os.path.basename(filename), "bpf": bpf})
        # Each packet has a incremental numeric id
        # A packet is identified with its sensorname filename and packet id for
        # further aggregation with meta data.
        # Assumption: Each program process the pcap file the same way?
        packet_id = 0
        
        for line in proc.stdout.readlines():
            packet_id = packet_id + 1
            line = line[:-1].decode()
            packet = {}
            tab_line = line.split(' ')
            for i in range(len(tab_line)):
                if fieldfilter:
                    valname = json_fields[tshark_fields.index(fieldfilter[i])]
                else:
                    valname = json_fields[i]
                if valname in special_fields:
                    v = special_fields[valname]
                    try:
                        v = int(tab_line[i])
                    except ValueError:
                        pass
                    packet[valname] = v
                else:
                    packet[valname] = tab_line[i]
            fill_packet(packet, disable_json)
            packet['packet_id'] = packet_id
            packet['type'] = potiron.TYPE_PACKET
            packet['state'] = potiron.STATE_NOT_ANNOTATE
            # FIXME might consume a lot of memory
            allpackets.append(packet)
    
        # FIXME Implement polling because wait can last forever
        proc.wait()
    
        if proc.returncode != 0:
            errmsg = b"".join(proc.stderr.readlines())
            raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
        # Write and save the json file
        jsonfilename = potiron.store_packet(rootdir, filename, json.dumps(allpackets))
        if b_redis:
            # If redis option, store data into redis
            potiron_redis.process_storage(jsonfilename, red, ck)
Esempio n. 2
0
def process_file(outputdir, filename):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(filename),
        "bpf": bpf
    })
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack "
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(
        bpf, filename)
    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, sport, dport, tcpseq, tcpack = line.split(' ')
        isport = -1
        idport = -1
        itcpseq = -1
        itcpack = -1
        try:
            isport = int(sport)
        except ValueError:
            pass
        try:
            idport = int(dport)
        except ValueError:
            pass
        try:
            itcpseq = int(tcpseq)
        except ValueError:
            pass
        try:
            itcpack = int(tcpack)
        except ValueError:
            pass
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b[:-3]
        packet = {
            'timestamp': stime,
            'sport': isport,
            'dport': idport,
            'tcpseq': itcpseq,
            'tcpack': itcpack,
            'type': potiron.TYPE_PACKET,
            'state': potiron.STATE_NOT_ANNOTATE
        }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day, time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        for k in packet:
            if k not in non_index:
                feature = packet[k]
                if k.startswith(potiron.ANNOTATION_PREFIX):
                    feature = potiron.translate_dictionaries(
                        rev_dics, red, k, packet[k])
                    # Create the links between annotations and their objects
                    idn = potiron.get_dictionary_id(k)
                    obj = potiron.get_annotation_origin(packet, k)
                    if obj is not None and idn is not None:
                        kn = "AR_{}_{}".format(idn, obj)
                        p.set(kn, feature)
                #keyname = "{}_{}".format(sensorname,timestamp)
                keyname = "{}_src{}_dst{}_{}".format(sensorname, isport,
                                                     idport, timestamp)
                p.hset(keyname, k, feature)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(
            proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, filename, json.dumps(allpackets))
Esempio n. 3
0
def process_file(rootdir, filename):
    if not potiron.check_program("ipsumdump"):
        raise OSError("The program ipsumpdump is not installed")
    # FIXME Put in config file
    if rootdir is not None:
        potiron.create_dirs(rootdir, filename)
    packet = {}
    sensorname = potiron.derive_sensor_name(filename)
    allpackets = []
    # Describe the source
    allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                       "filename": os.path.basename(filename)})
    # Each packet as a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    proc = subprocess.Popen(["ipsumdump", "--no-headers", "--quiet", "--timestamp",
                             "--length", "--protocol", "--ip-src", "--ip-dst", "--ip-opt",
                             "--ip-ttl", "--ip-tos", "--sport", "--dport", "--tcp-seq", "--tcp-ack",
                             "--icmp-code", "--icmp-type", "-f", potiron.bpfilter, "-r", filename],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, length, protocol, ipsrc, ipdst, ipop, ipttl, iptos, sport, dport, tcpseq, tcpack, icmpcode, icmptype = line.split(' ')
        ilength = -1
        iipttl = -1
        iiptos = -1
        isport = -1
        idport = -1
        itcpseq = -1
        itcpack = -1
        iicmpcode = 255
        iicmptype = 255
        try:
            ilength = int(length)
        except ValueError:
            pass
        try:
            iipttl = int(ipttl)
        except ValueError:
            pass
        try:
            iiptos = int(iptos)
        except ValueError:
            pass
        try:
            isport = int(sport)
        except ValueError:
            pass
        try:
            idport = int(dport)
        except ValueError:
            pass
        try:
            itcpseq = int(tcpseq)
        except ValueError:
            pass
        try:
            itcpack = int(tcpack)
        except ValueError:
            pass
        try:
            iicmpcode = int(icmpcode)
        except ValueError:
            pass
        try:
            iicmptype = int(icmptype)
        except ValueError:
            pass

        if ipsrc == '-':
            ipsrc = None
        if ipdst == '-':
            ipdst = None
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b
        packet = {'timestamp': stime,
                  'length': ilength,
                  'protocol': numerize_proto(protocol),
                  'ipsrc': ipsrc,
                  'ipdst': ipdst,
                  'ipop': ipop,
                  'ipttl': iipttl,
                  'iptos': iiptos,
                  'sport': isport,
                  'dport': idport,
                  'tcpseq': itcpseq,
                  'tcpack': itcpack,
                  'icmpcode': iicmpcode,
                  'icmptype': iicmptype,
                  'packet_id': packet_id,
                  'type': potiron.TYPE_PACKET,
                  'state': potiron.STATE_NOT_ANNOATE
                  }
        # FIXME might consume a lot of memory
        allpackets.append(packet)

    # FIXME Implement polling because wait can last forever
    proc.wait()

    if proc.returncode != 0:
        errmsg = "".join(proc.stderr.readlines())
        raise OSError("ipsumdump failed. Return code {}. {}".format(proc.returncode, errmsg))
    potiron.store_packet(rootdir, filename, json.dumps(allpackets))
Esempio n. 4
0
def process_file(outputdir, inputfile):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(inputfile)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                       "filename": os.path.basename(inputfile)})
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    timestampKey = None
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac "
    cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format(inputfile)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split('|')
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime += ".{}".format(b[:-3])
        packet = {'timestamp': stime,
                  'ethsrc': ethsrc,
                  'ethdst': ethdst,
                  'ipsrc': ipsrc,
                  'ipdst': ipdst,
                  'arpsrc': arpsrc,
                  'arpdst': arpdst,
                  'opcode': opcode,
                  'type': potiron.TYPE_PACKET,
                  'state': potiron.STATE_NOT_ANNOTATE
                  }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day,time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        countKeyname = '{}_{}_count'.format(sensorname,day)
        if opcode == '1':
            keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp)
#            print(keyname)
            p.hset(keyname, 'req_src_mac', ethsrc)
            p.hset(keyname, 'req_src_ip', ipsrc)
            p.hset(keyname, 'req_src_arp_mac', arpsrc)
            p.zincrby(countKeyname, 'request', 1)
            timestampKey = timestamp
        else:
            keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey)
#            print(keyname)
            p.hset(keyname, 'rep_timestamp', stime)
            p.hset(keyname, 'rep_dst_ip', ipdst)
            p.hset(keyname, 'rep_src_mac', ethsrc)
            p.hset(keyname, 'rep_dst_mac', ethdst)
            p.hset(keyname, 'rep_src_arp_mac', arpsrc)
            p.hset(keyname, 'rep_dst_arp_mac', arpdst)
            p.zincrby(countKeyname, 'reply', 1)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
Esempio n. 5
0
def process_file(outputdir, inputfile):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(inputfile)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(inputfile)
    })
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    timestampKey = None
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac "
    cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format(
        inputfile)
    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split(
            '|')
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime += ".{}".format(b[:-3])
        packet = {
            'timestamp': stime,
            'ethsrc': ethsrc,
            'ethdst': ethdst,
            'ipsrc': ipsrc,
            'ipdst': ipdst,
            'arpsrc': arpsrc,
            'arpdst': arpdst,
            'opcode': opcode,
            'type': potiron.TYPE_PACKET,
            'state': potiron.STATE_NOT_ANNOTATE
        }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day, time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        countKeyname = '{}_{}_count'.format(sensorname, day)
        if opcode == '1':
            keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp)
            #            print(keyname)
            p.hset(keyname, 'req_src_mac', ethsrc)
            p.hset(keyname, 'req_src_ip', ipsrc)
            p.hset(keyname, 'req_src_arp_mac', arpsrc)
            p.zincrby(countKeyname, 'request', 1)
            timestampKey = timestamp
        else:
            keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey)
            #            print(keyname)
            p.hset(keyname, 'rep_timestamp', stime)
            p.hset(keyname, 'rep_dst_ip', ipdst)
            p.hset(keyname, 'rep_src_mac', ethsrc)
            p.hset(keyname, 'rep_dst_mac', ethdst)
            p.hset(keyname, 'rep_src_arp_mac', arpsrc)
            p.hset(keyname, 'rep_dst_arp_mac', arpdst)
            p.zincrby(countKeyname, 'reply', 1)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(
            proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
def process_file(rootdir, filename):
    if check_program("ipsumdump") == False:
        raise OSError("The program ipsumpdump is not installed")
    #FIXME Put in config file
    if rootdir is not None:
        create_dirs(rootdir, filename)
    packet = {}
    sensorname = potiron.derive_sensor_name(filename)
    allpackets = []
    #Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(filename)
    })
    #Each packet as a incremental numeric id
    #A packet is identified with its sensorname filename and packet id for
    #further aggregation with meta data.
    #Assumption: Each program process the pcap file the same way?
    packet_id = 0
    proc = subprocess.Popen([
        "ipsumdump", "--no-headers", "--quiet", "--timestamp", "--length",
        "--protocol", "--ip-src", "--ip-dst", "--ip-opt", "--ip-ttl",
        "--ip-tos", "--sport", "--dport", "--icmp-code", "--icmp-type", "-f",
        potiron.bpffilter, "-r", filename
    ],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1]
        (timestamp, length, protocol, ipsrc, ipdst, ipop, ipttl, iptos, sport,
         dport, icmpcode, icmptype) = line.split(' ')
        ilength = -1
        iipttl = -1
        iiptos = -1
        isport = -1
        idport = -1
        iicmpcode = 255
        iicmptype = 255
        try:
            ilength = int(length)
            iipttl = int(ipttl)
            iiptos = int(iptos)
            isport = int(sport)
            idport = int(dport)
            iicmpcode = int(iicmpcode)
            iicmptype = int(iicmptype)
        except ValueError:
            pass
        if ipsrc == '-':
            ipsrc = None
        if ipdst == '-':
            ipdst = None
        #Convert timestamp
        (a, b) = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b

        packet = {
            'timestamp': stime,
            'length': ilength,
            'protocol': numerize_proto(protocol),
            'ipsrc': ipsrc,
            'ipdst': ipdst,
            'ipop': ipop,
            'ipttl': iipttl,
            'iptos': iiptos,
            'sport': isport,
            'dport': idport,
            'icmpcode': iicmpcode,
            'icmptype': iicmptype,
            'packet_id': packet_id,
            'type': potiron.TYPE_PACKET,
            'state': potiron.STATE_NOT_ANNOATE
        }
        #FIXME might consume a lot of memory
        allpackets.append(packet)

    #FIXME Implement polling because wait can last forever
    proc.wait()

    if proc.returncode != 0:
        errmsg = "".join(proc.stderr.readlines())
        raise OSError("ipsumdump failed. Return code " + str(proc.returncode) +
                      ". " + errmsg)
    store_packet(rootdir, filename, json.dumps(allpackets))
Esempio n. 7
0
def process_file(outputdir, filename):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                       "filename": os.path.basename(filename), "bpf": bpf})
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack "
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, sport, dport, tcpseq, tcpack = line.split(' ')
        isport = -1
        idport = -1
        itcpseq = -1
        itcpack = -1
        try:
            isport = int(sport)
        except ValueError:
            pass
        try:
            idport = int(dport)
        except ValueError:
            pass
        try:
            itcpseq = int(tcpseq)
        except ValueError:
            pass
        try:
            itcpack = int(tcpack)
        except ValueError:
            pass
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b[:-3]
        packet = {'timestamp': stime,
                  'sport': isport,
                  'dport': idport,
                  'tcpseq': itcpseq,
                  'tcpack': itcpack,
                  'type': potiron.TYPE_PACKET,
                  'state': potiron.STATE_NOT_ANNOTATE
                  }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day,time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        for k in packet:
            if k not in non_index:
                feature = packet[k]
                if k.startswith(potiron.ANNOTATION_PREFIX):
                    feature = potiron.translate_dictionaries(rev_dics, red, k, packet[k])
                    # Create the links between annotations and their objects
                    idn = potiron.get_dictionary_id(k)
                    obj = potiron.get_annotation_origin(packet, k)
                    if obj is not None and idn is not None:
                        kn = "AR_{}_{}".format(idn, obj)
                        p.set(kn, feature)
                #keyname = "{}_{}".format(sensorname,timestamp)
                keyname = "{}_src{}_dst{}_{}".format(sensorname,isport,idport,timestamp)
                p.hset(keyname,k,feature)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
    if not disable_json:
        # Write data into the json output file
        potiron.store_packet(outputdir, filename, json.dumps(allpackets))
Esempio n. 8
0
def process_file(rootdir, filename, fieldfilter, b_redis, ck):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # FIXME Put in config file
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    allpackets = []
    # Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(filename),
        "bpf": bpf
    })
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    tshark_fields = potiron.tshark_fields
    cmd = "tshark -n -q -Tfields "
    if fieldfilter:
        if 'frame.time_epoch' not in fieldfilter:
            fieldfilter.insert(0, 'frame.time_epoch')
        if 'ip.proto' not in fieldfilter:
            fieldfilter.insert(1, 'ip.proto')
        for p in fieldfilter:
            cmd += "-e {} ".format(p)
    else:
        for f in tshark_fields:
            cmd += "-e {} ".format(f)
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(
        bpf, filename)

    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    json_fields = potiron.json_fields
    special_fields = {
        'length': -1,
        'ipttl': -1,
        'iptos': 0,
        'tcpseq': -1,
        'tcpack': -1,
        'icmpcode': 255,
        'icmptype': 255
    }
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        packet = {}
        tab_line = line.split(' ')
        for i in range(len(tab_line)):
            if fieldfilter:
                valname = json_fields[tshark_fields.index(fieldfilter[i])]
            else:
                valname = json_fields[i]
            if valname in special_fields:
                v = special_fields[valname]
                try:
                    v = int(tab_line[i])
                except ValueError:
                    pass
                packet[valname] = v
            else:
                packet[valname] = tab_line[i]
        fill_packet(packet)
        packet['packet_id'] = packet_id
        packet['type'] = potiron.TYPE_PACKET
        packet['state'] = potiron.STATE_NOT_ANNOTATE
        # FIXME might consume a lot of memory
        allpackets.append(packet)

    # FIXME Implement polling because wait can last forever
    proc.wait()

    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(
            proc.returncode, errmsg))
    # Write and save the json file
    jsonfilename = potiron.store_packet(rootdir, filename,
                                        json.dumps(allpackets))
    if b_redis:
        # If redis option, store data into redis
        potiron_redis.process_storage(jsonfilename, red, ck)