Ejemplo n.º 1
0
    red.sadd('BPF', bpf)
# FIXME documents must include at least a sensorname and a timestamp
# FIXME check timestamp format
sensorname = potiron.get_sensor_name(doc)
lastday = None
revcreated = False

for di in doc:
    if di["type"] > potiron.DICT_LOWER_BOUNDARY:
        local_dicts[di["type"]] = di
    if di["type"] == potiron.TYPE_PACKET:
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        timestamp = di['timestamp']
        sport = di['sport']
        dport = di['dport']
        (day, time) = timestamp.split(' ')
        timestamp = "{}_{}".format(day,time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        p = red.pipeline()
        for k in list(di.keys()):
            if k not in non_index:
                feature = di[k]
                if k.startswith(potiron.ANNOTATION_PREFIX):
                    feature = potiron.translate_dictionaries(rev_dics, red, k, di[k])
Ejemplo n.º 2
0
def process_file(outputdir, filename):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(filename),
        "bpf": bpf
    })
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack "
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(
        bpf, filename)
    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, sport, dport, tcpseq, tcpack = line.split(' ')
        isport = -1
        idport = -1
        itcpseq = -1
        itcpack = -1
        try:
            isport = int(sport)
        except ValueError:
            pass
        try:
            idport = int(dport)
        except ValueError:
            pass
        try:
            itcpseq = int(tcpseq)
        except ValueError:
            pass
        try:
            itcpack = int(tcpack)
        except ValueError:
            pass
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b[:-3]
        packet = {
            'timestamp': stime,
            'sport': isport,
            'dport': idport,
            'tcpseq': itcpseq,
            'tcpack': itcpack,
            'type': potiron.TYPE_PACKET,
            'state': potiron.STATE_NOT_ANNOTATE
        }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day, time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        for k in packet:
            if k not in non_index:
                feature = packet[k]
                if k.startswith(potiron.ANNOTATION_PREFIX):
                    feature = potiron.translate_dictionaries(
                        rev_dics, red, k, packet[k])
                    # Create the links between annotations and their objects
                    idn = potiron.get_dictionary_id(k)
                    obj = potiron.get_annotation_origin(packet, k)
                    if obj is not None and idn is not None:
                        kn = "AR_{}_{}".format(idn, obj)
                        p.set(kn, feature)
                #keyname = "{}_{}".format(sensorname,timestamp)
                keyname = "{}_src{}_dst{}_{}".format(sensorname, isport,
                                                     idport, timestamp)
                p.hset(keyname, k, feature)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(
            proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, filename, json.dumps(allpackets))
Ejemplo n.º 3
0
def process_file(outputdir, inputfile):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(inputfile)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                       "filename": os.path.basename(inputfile)})
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    timestampKey = None
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac "
    cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format(inputfile)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split('|')
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime += ".{}".format(b[:-3])
        packet = {'timestamp': stime,
                  'ethsrc': ethsrc,
                  'ethdst': ethdst,
                  'ipsrc': ipsrc,
                  'ipdst': ipdst,
                  'arpsrc': arpsrc,
                  'arpdst': arpdst,
                  'opcode': opcode,
                  'type': potiron.TYPE_PACKET,
                  'state': potiron.STATE_NOT_ANNOTATE
                  }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day,time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        countKeyname = '{}_{}_count'.format(sensorname,day)
        if opcode == '1':
            keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp)
#            print(keyname)
            p.hset(keyname, 'req_src_mac', ethsrc)
            p.hset(keyname, 'req_src_ip', ipsrc)
            p.hset(keyname, 'req_src_arp_mac', arpsrc)
            p.zincrby(countKeyname, 'request', 1)
            timestampKey = timestamp
        else:
            keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey)
#            print(keyname)
            p.hset(keyname, 'rep_timestamp', stime)
            p.hset(keyname, 'rep_dst_ip', ipdst)
            p.hset(keyname, 'rep_src_mac', ethsrc)
            p.hset(keyname, 'rep_dst_mac', ethdst)
            p.hset(keyname, 'rep_src_arp_mac', arpsrc)
            p.hset(keyname, 'rep_dst_arp_mac', arpdst)
            p.zincrby(countKeyname, 'reply', 1)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
Ejemplo n.º 4
0
def process_file(outputdir, inputfile):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(inputfile)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({
        "type": potiron.TYPE_SOURCE,
        "sensorname": sensorname,
        "filename": os.path.basename(inputfile)
    })
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    timestampKey = None
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac "
    cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format(
        inputfile)
    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split(
            '|')
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime += ".{}".format(b[:-3])
        packet = {
            'timestamp': stime,
            'ethsrc': ethsrc,
            'ethdst': ethdst,
            'ipsrc': ipsrc,
            'ipdst': ipdst,
            'arpsrc': arpsrc,
            'arpdst': arpdst,
            'opcode': opcode,
            'type': potiron.TYPE_PACKET,
            'state': potiron.STATE_NOT_ANNOTATE
        }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day, time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        countKeyname = '{}_{}_count'.format(sensorname, day)
        if opcode == '1':
            keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp)
            #            print(keyname)
            p.hset(keyname, 'req_src_mac', ethsrc)
            p.hset(keyname, 'req_src_ip', ipsrc)
            p.hset(keyname, 'req_src_arp_mac', arpsrc)
            p.zincrby(countKeyname, 'request', 1)
            timestampKey = timestamp
        else:
            keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey)
            #            print(keyname)
            p.hset(keyname, 'rep_timestamp', stime)
            p.hset(keyname, 'rep_dst_ip', ipdst)
            p.hset(keyname, 'rep_src_mac', ethsrc)
            p.hset(keyname, 'rep_dst_mac', ethdst)
            p.hset(keyname, 'rep_src_arp_mac', arpsrc)
            p.hset(keyname, 'rep_dst_arp_mac', arpdst)
            p.zincrby(countKeyname, 'reply', 1)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(
            proc.returncode, errmsg))
    # Write data into the json output file
    potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
Ejemplo n.º 5
0
def process_file(outputdir, filename):
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    revcreated = False
    lastday = None
    local_dicts = dict()
    rev_dics = dict()
    allpackets = []
    # Describe the source
    allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                       "filename": os.path.basename(filename), "bpf": bpf})
    # Each packet has a incremental numeric id
    # A packet is identified with its sensorname filename and packet id for
    # further aggregation with meta data.
    # Assumption: Each program process the pcap file the same way?
    packet_id = 0
    cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack "
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in proc.stdout.readlines():
        packet_id = packet_id + 1
        line = line[:-1].decode()
        timestamp, sport, dport, tcpseq, tcpack = line.split(' ')
        isport = -1
        idport = -1
        itcpseq = -1
        itcpack = -1
        try:
            isport = int(sport)
        except ValueError:
            pass
        try:
            idport = int(dport)
        except ValueError:
            pass
        try:
            itcpseq = int(tcpseq)
        except ValueError:
            pass
        try:
            itcpack = int(tcpack)
        except ValueError:
            pass
        # Convert timestamp
        a, b = timestamp.split('.')
        dobj = datetime.datetime.fromtimestamp(float(a))
        stime = dobj.strftime("%Y-%m-%d %H:%M:%S")
        stime = stime + "." + b[:-3]
        packet = {'timestamp': stime,
                  'sport': isport,
                  'dport': idport,
                  'tcpseq': itcpseq,
                  'tcpack': itcpack,
                  'type': potiron.TYPE_PACKET,
                  'state': potiron.STATE_NOT_ANNOTATE
                  }
        allpackets.append(packet)
        if not revcreated:
            # FIXME if a json file was annotated twice the resulting json file
            # includes two dictionaries of the same type
            # Only the last one is considered
            rev_dics = potiron.create_reverse_local_dicts(local_dicts)
            revcreated = True
        (day, time) = stime.split(' ')
        timestamp = "{}_{}".format(day,time)
        day = day.replace('-', '')
        if day != lastday:
            red.sadd("DAYS", day)
        # Store data into redis
        p = red.pipeline()
        for k in packet:
            if k not in non_index:
                feature = packet[k]
                if k.startswith(potiron.ANNOTATION_PREFIX):
                    feature = potiron.translate_dictionaries(rev_dics, red, k, packet[k])
                    # Create the links between annotations and their objects
                    idn = potiron.get_dictionary_id(k)
                    obj = potiron.get_annotation_origin(packet, k)
                    if obj is not None and idn is not None:
                        kn = "AR_{}_{}".format(idn, obj)
                        p.set(kn, feature)
                #keyname = "{}_{}".format(sensorname,timestamp)
                keyname = "{}_src{}_dst{}_{}".format(sensorname,isport,idport,timestamp)
                p.hset(keyname,k,feature)
        p.execute()
    proc.wait()
    if proc.returncode != 0:
        errmsg = b"".join(proc.stderr.readlines())
        raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
    if not disable_json:
        # Write data into the json output file
        potiron.store_packet(outputdir, filename, json.dumps(allpackets))
Ejemplo n.º 6
0
def process_storage(filename, red, ck):
    # Check if file was already imported
    fn = os.path.basename(filename)
    if red.sismember("FILES", fn):
        sys.stderr.write('[INFO] Filename ' + fn +
                         ' was already imported ... skip ...\n')
        sys.exit(0)
    # FIXME Users have to be carefull with the files extensions to not process data from capture files
    # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module)

    f = open(filename, 'r')
    doc = json.load(f)
    f.close()

    # Record local dictionaries
    local_dicts = dict()
    rev_dics = dict()

    # Get sensorname assume one document per sensor name
    item = doc[0]
    bpf = item['bpf']
    # If redis key 'BPF' already exists
    if red.keys('BPF'):
        # Check is the current bpf is the same as the one previously used
        if not red.sismember('BPF', bpf):
            bpf_string = str(red.smembers('BPF'))
            sys.stderr.write(
                '[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n'
                .format(bpf_string[3:-2]))
            sys.exit(0)
    # On the other case, add the bpf in the key 'BPF'
    else:
        red.sadd('BPF', bpf)

    # If combined keys are used
    if ck:
        # If redis key 'CK' already exists ...
        if red.keys('CK'):
            # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis
            if red.sismember('CK', 'NO'):
                sys.stderr.write(
                    '[INFO] Combined key are not used in this redis dataset.\n'
                )
                sys.exit(0)
        # If redis key 'CK' does not exist ...
        else:
            red.sadd('CK', 'YES')
    # If combined key are not used, the key 'CK' should exist anyway, with the value 'None'
    else:
        # If redis key 'CK' already exists ...
        if red.keys('CK'):
            # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis
            if red.sismember('CK', 'YES'):
                sys.stderr.write(
                    '[INFO] Combined key are used in this redis dataset.\n')
                sys.exit(0)
        # On the other case, we add it
        else:
            red.sadd('CK', 'NO')

    red.sadd("FILES", fn)

    # Project directory
    potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3]
    protocols_path = "{}doc/protocols".format(potiron_path)
    protocols = potiron.define_protocols(protocols_path)

    # FIXME documents must include at least a sensorname and a timestamp
    # FIXME check timestamp format
    sensorname = potiron.get_sensor_name(doc)
    lastday = None
    revcreated = False
    prot = []
    for di in doc:
        if di["type"] > potiron.DICT_LOWER_BOUNDARY:
            local_dicts[di["type"]] = di
        if di["type"] == potiron.TYPE_PACKET:
            if not revcreated:
                # FIXME if a json file was annotated twice the resulting json file
                # includes two dictionaries of the same type
                # Only the last one is considered
                rev_dics = potiron.create_reverse_local_dicts(local_dicts)
                revcreated = True
            key = sensorname
            if ck:
                protocol = protocols[str(di['protocol'])]
                key += ":{}".format(protocol)
                if protocol not in prot:
                    prot.append(protocol)
            timestamp = di['timestamp']
            (day, time) = timestamp.split(' ')
            day = day.replace('-', '')
            p = red.pipeline()
            if day != lastday:
                p.sadd("DAYS", day)
                lastday = day
            for k in list(di.keys()):
                if k not in non_index:
                    feature = di[k]
                    if k.startswith(potiron.ANNOTATION_PREFIX):
                        feature = potiron.translate_dictionaries(
                            rev_dics, red, k, di[k])
                        # Create the links between annotations and their objects
                        idn = potiron.get_dictionary_id(k)
                        obj = potiron.get_annotation_origin(di, k)
                        if obj is not None and idn is not None:
                            kn = "AR_{}_{}".format(idn, obj)
                            p.set(kn, feature)
                    keyname = "{}:{}:{}".format(key, day, k)
                    p.sadd("FIELDS", k)
                    p.zincrby(keyname, feature, 1)
            # FIXME the pipe might be to big peridocially flush them
            p.execute()
    if ck:
        for pr in prot:
            red.sadd("PROTOCOLS", pr)
    potiron.infomsg('Data from {} stored into redis'.format(filename))