red.sadd('BPF', bpf) # FIXME documents must include at least a sensorname and a timestamp # FIXME check timestamp format sensorname = potiron.get_sensor_name(doc) lastday = None revcreated = False for di in doc: if di["type"] > potiron.DICT_LOWER_BOUNDARY: local_dicts[di["type"]] = di if di["type"] == potiron.TYPE_PACKET: if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True timestamp = di['timestamp'] sport = di['sport'] dport = di['dport'] (day, time) = timestamp.split(' ') timestamp = "{}_{}".format(day,time) day = day.replace('-', '') if day != lastday: red.sadd("DAYS", day) p = red.pipeline() for k in list(di.keys()): if k not in non_index: feature = di[k] if k.startswith(potiron.ANNOTATION_PREFIX): feature = potiron.translate_dictionaries(rev_dics, red, k, di[k])
def process_file(outputdir, filename): # If tshark is not installed, exit and raise the error if not potiron.check_program("tshark"): raise OSError("The program tshark is not installed") # Name of the honeypot sensorname = potiron.derive_sensor_name(filename) revcreated = False lastday = None local_dicts = dict() rev_dics = dict() allpackets = [] # Describe the source allpackets.append({ "type": potiron.TYPE_SOURCE, "sensorname": sensorname, "filename": os.path.basename(filename), "bpf": bpf }) # Each packet has a incremental numeric id # A packet is identified with its sensorname filename and packet id for # further aggregation with meta data. # Assumption: Each program process the pcap file the same way? packet_id = 0 cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack " cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format( bpf, filename) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in proc.stdout.readlines(): packet_id = packet_id + 1 line = line[:-1].decode() timestamp, sport, dport, tcpseq, tcpack = line.split(' ') isport = -1 idport = -1 itcpseq = -1 itcpack = -1 try: isport = int(sport) except ValueError: pass try: idport = int(dport) except ValueError: pass try: itcpseq = int(tcpseq) except ValueError: pass try: itcpack = int(tcpack) except ValueError: pass # Convert timestamp a, b = timestamp.split('.') dobj = datetime.datetime.fromtimestamp(float(a)) stime = dobj.strftime("%Y-%m-%d %H:%M:%S") stime = stime + "." + b[:-3] packet = { 'timestamp': stime, 'sport': isport, 'dport': idport, 'tcpseq': itcpseq, 'tcpack': itcpack, 'type': potiron.TYPE_PACKET, 'state': potiron.STATE_NOT_ANNOTATE } allpackets.append(packet) if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True (day, time) = stime.split(' ') timestamp = "{}_{}".format(day, time) day = day.replace('-', '') if day != lastday: red.sadd("DAYS", day) # Store data into redis p = red.pipeline() for k in packet: if k not in non_index: feature = packet[k] if k.startswith(potiron.ANNOTATION_PREFIX): feature = potiron.translate_dictionaries( rev_dics, red, k, packet[k]) # Create the links between annotations and their objects idn = potiron.get_dictionary_id(k) obj = potiron.get_annotation_origin(packet, k) if obj is not None and idn is not None: kn = "AR_{}_{}".format(idn, obj) p.set(kn, feature) #keyname = "{}_{}".format(sensorname,timestamp) keyname = "{}_src{}_dst{}_{}".format(sensorname, isport, idport, timestamp) p.hset(keyname, k, feature) p.execute() proc.wait() if proc.returncode != 0: errmsg = b"".join(proc.stderr.readlines()) raise OSError("tshark failed. Return code {}. {}".format( proc.returncode, errmsg)) # Write data into the json output file potiron.store_packet(outputdir, filename, json.dumps(allpackets))
def process_file(outputdir, inputfile): # If tshark is not installed, exit and raise the error if not potiron.check_program("tshark"): raise OSError("The program tshark is not installed") # Name of the honeypot sensorname = potiron.derive_sensor_name(inputfile) revcreated = False lastday = None local_dicts = dict() rev_dics = dict() allpackets = [] # Describe the source allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname, "filename": os.path.basename(inputfile)}) # Each packet has a incremental numeric id # A packet is identified with its sensorname filename and packet id for # further aggregation with meta data. # Assumption: Each program process the pcap file the same way? packet_id = 0 timestampKey = None cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac " cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format(inputfile) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in proc.stdout.readlines(): packet_id = packet_id + 1 line = line[:-1].decode() timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split('|') # Convert timestamp a, b = timestamp.split('.') dobj = datetime.datetime.fromtimestamp(float(a)) stime = dobj.strftime("%Y-%m-%d %H:%M:%S") stime += ".{}".format(b[:-3]) packet = {'timestamp': stime, 'ethsrc': ethsrc, 'ethdst': ethdst, 'ipsrc': ipsrc, 'ipdst': ipdst, 'arpsrc': arpsrc, 'arpdst': arpdst, 'opcode': opcode, 'type': potiron.TYPE_PACKET, 'state': potiron.STATE_NOT_ANNOTATE } allpackets.append(packet) if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True (day, time) = stime.split(' ') timestamp = "{}_{}".format(day,time) day = day.replace('-', '') if day != lastday: red.sadd("DAYS", day) # Store data into redis p = red.pipeline() countKeyname = '{}_{}_count'.format(sensorname,day) if opcode == '1': keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp) # print(keyname) p.hset(keyname, 'req_src_mac', ethsrc) p.hset(keyname, 'req_src_ip', ipsrc) p.hset(keyname, 'req_src_arp_mac', arpsrc) p.zincrby(countKeyname, 'request', 1) timestampKey = timestamp else: keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey) # print(keyname) p.hset(keyname, 'rep_timestamp', stime) p.hset(keyname, 'rep_dst_ip', ipdst) p.hset(keyname, 'rep_src_mac', ethsrc) p.hset(keyname, 'rep_dst_mac', ethdst) p.hset(keyname, 'rep_src_arp_mac', arpsrc) p.hset(keyname, 'rep_dst_arp_mac', arpdst) p.zincrby(countKeyname, 'reply', 1) p.execute() proc.wait() if proc.returncode != 0: errmsg = b"".join(proc.stderr.readlines()) raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg)) # Write data into the json output file potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
def process_file(outputdir, inputfile): # If tshark is not installed, exit and raise the error if not potiron.check_program("tshark"): raise OSError("The program tshark is not installed") # Name of the honeypot sensorname = potiron.derive_sensor_name(inputfile) revcreated = False lastday = None local_dicts = dict() rev_dics = dict() allpackets = [] # Describe the source allpackets.append({ "type": potiron.TYPE_SOURCE, "sensorname": sensorname, "filename": os.path.basename(inputfile) }) # Each packet has a incremental numeric id # A packet is identified with its sensorname filename and packet id for # further aggregation with meta data. # Assumption: Each program process the pcap file the same way? packet_id = 0 timestampKey = None cmd = "tshark -n -q -Tfields -e frame.time_epoch -e eth.src -e eth.dst -e arp.src.proto_ipv4 -e arp.dst.proto_ipv4 -e arp.src.hw_mac " cmd += "-e arp.dst.hw_mac -e arp.opcode -E header=n -E separator='|' -Y 'eth.type == 0x806' -r {}".format( inputfile) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in proc.stdout.readlines(): packet_id = packet_id + 1 line = line[:-1].decode() timestamp, ethsrc, ethdst, ipsrc, ipdst, arpsrc, arpdst, opcode = line.split( '|') # Convert timestamp a, b = timestamp.split('.') dobj = datetime.datetime.fromtimestamp(float(a)) stime = dobj.strftime("%Y-%m-%d %H:%M:%S") stime += ".{}".format(b[:-3]) packet = { 'timestamp': stime, 'ethsrc': ethsrc, 'ethdst': ethdst, 'ipsrc': ipsrc, 'ipdst': ipdst, 'arpsrc': arpsrc, 'arpdst': arpdst, 'opcode': opcode, 'type': potiron.TYPE_PACKET, 'state': potiron.STATE_NOT_ANNOTATE } allpackets.append(packet) if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True (day, time) = stime.split(' ') timestamp = "{}_{}".format(day, time) day = day.replace('-', '') if day != lastday: red.sadd("DAYS", day) # Store data into redis p = red.pipeline() countKeyname = '{}_{}_count'.format(sensorname, day) if opcode == '1': keyname = '{}_{}_{}'.format(sensorname, ipdst, timestamp) # print(keyname) p.hset(keyname, 'req_src_mac', ethsrc) p.hset(keyname, 'req_src_ip', ipsrc) p.hset(keyname, 'req_src_arp_mac', arpsrc) p.zincrby(countKeyname, 'request', 1) timestampKey = timestamp else: keyname = '{}_{}_{}'.format(sensorname, ipsrc, timestampKey) # print(keyname) p.hset(keyname, 'rep_timestamp', stime) p.hset(keyname, 'rep_dst_ip', ipdst) p.hset(keyname, 'rep_src_mac', ethsrc) p.hset(keyname, 'rep_dst_mac', ethdst) p.hset(keyname, 'rep_src_arp_mac', arpsrc) p.hset(keyname, 'rep_dst_arp_mac', arpdst) p.zincrby(countKeyname, 'reply', 1) p.execute() proc.wait() if proc.returncode != 0: errmsg = b"".join(proc.stderr.readlines()) raise OSError("tshark failed. Return code {}. {}".format( proc.returncode, errmsg)) # Write data into the json output file potiron.store_packet(outputdir, inputfile, json.dumps(allpackets))
def process_file(outputdir, filename): # If tshark is not installed, exit and raise the error if not potiron.check_program("tshark"): raise OSError("The program tshark is not installed") # Name of the honeypot sensorname = potiron.derive_sensor_name(filename) revcreated = False lastday = None local_dicts = dict() rev_dics = dict() allpackets = [] # Describe the source allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname, "filename": os.path.basename(filename), "bpf": bpf}) # Each packet has a incremental numeric id # A packet is identified with its sensorname filename and packet id for # further aggregation with meta data. # Assumption: Each program process the pcap file the same way? packet_id = 0 cmd = "tshark -n -q -Tfields -e frame.time_epoch -e tcp.srcport -e tcp.dstport -e tcp.seq -e tcp.ack " cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in proc.stdout.readlines(): packet_id = packet_id + 1 line = line[:-1].decode() timestamp, sport, dport, tcpseq, tcpack = line.split(' ') isport = -1 idport = -1 itcpseq = -1 itcpack = -1 try: isport = int(sport) except ValueError: pass try: idport = int(dport) except ValueError: pass try: itcpseq = int(tcpseq) except ValueError: pass try: itcpack = int(tcpack) except ValueError: pass # Convert timestamp a, b = timestamp.split('.') dobj = datetime.datetime.fromtimestamp(float(a)) stime = dobj.strftime("%Y-%m-%d %H:%M:%S") stime = stime + "." + b[:-3] packet = {'timestamp': stime, 'sport': isport, 'dport': idport, 'tcpseq': itcpseq, 'tcpack': itcpack, 'type': potiron.TYPE_PACKET, 'state': potiron.STATE_NOT_ANNOTATE } allpackets.append(packet) if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True (day, time) = stime.split(' ') timestamp = "{}_{}".format(day,time) day = day.replace('-', '') if day != lastday: red.sadd("DAYS", day) # Store data into redis p = red.pipeline() for k in packet: if k not in non_index: feature = packet[k] if k.startswith(potiron.ANNOTATION_PREFIX): feature = potiron.translate_dictionaries(rev_dics, red, k, packet[k]) # Create the links between annotations and their objects idn = potiron.get_dictionary_id(k) obj = potiron.get_annotation_origin(packet, k) if obj is not None and idn is not None: kn = "AR_{}_{}".format(idn, obj) p.set(kn, feature) #keyname = "{}_{}".format(sensorname,timestamp) keyname = "{}_src{}_dst{}_{}".format(sensorname,isport,idport,timestamp) p.hset(keyname,k,feature) p.execute() proc.wait() if proc.returncode != 0: errmsg = b"".join(proc.stderr.readlines()) raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg)) if not disable_json: # Write data into the json output file potiron.store_packet(outputdir, filename, json.dumps(allpackets))
def process_storage(filename, red, ck): # Check if file was already imported fn = os.path.basename(filename) if red.sismember("FILES", fn): sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n') sys.exit(0) # FIXME Users have to be carefull with the files extensions to not process data from capture files # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module) f = open(filename, 'r') doc = json.load(f) f.close() # Record local dictionaries local_dicts = dict() rev_dics = dict() # Get sensorname assume one document per sensor name item = doc[0] bpf = item['bpf'] # If redis key 'BPF' already exists if red.keys('BPF'): # Check is the current bpf is the same as the one previously used if not red.sismember('BPF', bpf): bpf_string = str(red.smembers('BPF')) sys.stderr.write( '[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n' .format(bpf_string[3:-2])) sys.exit(0) # On the other case, add the bpf in the key 'BPF' else: red.sadd('BPF', bpf) # If combined keys are used if ck: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is set to 'Εƒone', then combined keys are not used in the data already stored in redis if red.sismember('CK', 'NO'): sys.stderr.write( '[INFO] Combined key are not used in this redis dataset.\n' ) sys.exit(0) # If redis key 'CK' does not exist ... else: red.sadd('CK', 'YES') # If combined key are not used, the key 'CK' should exist anyway, with the value 'None' else: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis if red.sismember('CK', 'YES'): sys.stderr.write( '[INFO] Combined key are used in this redis dataset.\n') sys.exit(0) # On the other case, we add it else: red.sadd('CK', 'NO') red.sadd("FILES", fn) # Project directory potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3] protocols_path = "{}doc/protocols".format(potiron_path) protocols = potiron.define_protocols(protocols_path) # FIXME documents must include at least a sensorname and a timestamp # FIXME check timestamp format sensorname = potiron.get_sensor_name(doc) lastday = None revcreated = False prot = [] for di in doc: if di["type"] > potiron.DICT_LOWER_BOUNDARY: local_dicts[di["type"]] = di if di["type"] == potiron.TYPE_PACKET: if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True key = sensorname if ck: protocol = protocols[str(di['protocol'])] key += ":{}".format(protocol) if protocol not in prot: prot.append(protocol) timestamp = di['timestamp'] (day, time) = timestamp.split(' ') day = day.replace('-', '') p = red.pipeline() if day != lastday: p.sadd("DAYS", day) lastday = day for k in list(di.keys()): if k not in non_index: feature = di[k] if k.startswith(potiron.ANNOTATION_PREFIX): feature = potiron.translate_dictionaries( rev_dics, red, k, di[k]) # Create the links between annotations and their objects idn = potiron.get_dictionary_id(k) obj = potiron.get_annotation_origin(di, k) if obj is not None and idn is not None: kn = "AR_{}_{}".format(idn, obj) p.set(kn, feature) keyname = "{}:{}:{}".format(key, day, k) p.sadd("FIELDS", k) p.zincrby(keyname, feature, 1) # FIXME the pipe might be to big peridocially flush them p.execute() if ck: for pr in prot: red.sadd("PROTOCOLS", pr) potiron.infomsg('Data from {} stored into redis'.format(filename))