def check_mandatory_fields(self, doc): complete = True for field in self.mfields: if field not in doc: infomsg("Field "+ field +" is missing for annotations") complete = False return complete
def check_mandatory_fields(self, doc): complete = True for field in self.mfields: if field not in doc: infomsg("Field " + field + " is missing for annotations") complete = False return complete
def store_packet(rootdir, pcapfilename, obj): if rootdir is not None: jsonfilename = potiron.get_file_struct(rootdir, pcapfilename) f = open(jsonfilename, "w") f.write(obj) f.close() infomsg("Created filename " + jsonfilename) else: sys.stdout.write(obj)
def store_packet(rootdir, pcapfilename, obj): if rootdir is not None: jsonfilename = potiron.get_file_struct(rootdir, pcapfilename) f = open(jsonfilename,"w") f.write(obj) f.close() infomsg("Created filename "+jsonfilename) else: sys.stdout.write(obj)
def handle_docs(self, docs): i = 0 newdocs = [] for doc in docs: i = i + 1 if self.check_mandatory_fields(doc): doc = self.annoate_doc(doc) else: infomsg("Document number {} cannot be annotated due to missing mandatory fields".format(i)) # If the document is not complete or could not be annotated it should be # left intact newdocs.append(doc) return newdocs
def handle_docs(self, docs): i = 0 newdocs = [] for doc in docs: i = i + 1 if self.check_mandatory_fields(doc): doc = self.annoate_doc(doc) else: infomsg("Document number "+ str(i) + " cannot be annotated due to missing mandatory fields") #If the document is not complete or could not be annotated it should be #left intact newdocs.append(doc) return newdocs
def process_file(rootdir, filename, fieldfilter, b_redis, disable_json, ck): if disable_json: fn = os.path.basename(filename) if red.sismember("FILES", fn): sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n') sys.exit(0) # FIXME Users have to be carefull with the files extensions to not process data from capture files # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module) # List of fields that are included in the json documents that should not be ranked # FIXME Put this as argument to the program as this list depends on the documents that is introduced non_index = ['', 'filename', 'sensorname', 'timestamp', 'packet_id'] # If tshark is not installed, exit and raise the error if not potiron.check_program("tshark"): raise OSError("The program tshark is not installed") # FIXME Put in config file tshark_fields = potiron.tshark_fields cmd = "tshark -n -q -Tfields " if fieldfilter: if 'frame.time_epoch' not in fieldfilter: fieldfilter.insert(0, 'frame.time_epoch') if 'ip.proto' not in fieldfilter: fieldfilter.insert(1, 'ip.proto') for p in fieldfilter: cmd += "-e {} ".format(p) else: for f in tshark_fields: cmd += "-e {} ".format(f) cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Name of the honeypot sensorname = potiron.derive_sensor_name(filename) json_fields = potiron.json_fields special_fields = {'length': -1, 'ipttl': -1, 'iptos': 0, 'tcpseq': -1, 'tcpack': -1, 'icmpcode': 255, 'icmptype': 255} if disable_json: # If redis key 'BPF' already exists if red.keys('BPF'): # Check is the current bpf is the same as the one previously used if not red.sismember('BPF', bpf): bpf_string = str(red.smembers('BPF')) sys.stderr.write('[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n'.format(bpf_string[3:-2])) sys.exit(0) # On the other case, add the bpf in the key 'BPF' else: red.sadd('BPF', bpf) # If combined keys are used if ck: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis if red.sismember('CK','NO'): sys.stderr.write('[INFO] Combined key are not used in this redis dataset.\n') sys.exit(0) # If redis key 'CK' does not exist ... else: red.sadd('CK','YES') # If combined key are not used, the key 'CK' should exist anyway, with the value 'None' else: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis if red.sismember('CK','YES'): sys.stderr.write('[INFO] Combined key are used in this redis dataset.\n') sys.exit(0) # On the other case, we add it else: red.sadd('CK','NO') red.sadd("FILES", fn) potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3] protocols_path = "{}doc/protocols".format(potiron_path) protocols = potiron.define_protocols(protocols_path) lastday = None prot = [] for line in proc.stdout.readlines(): line = line[:-1].decode() packet = {} tab_line = line.split(' ') for i in range(len(tab_line)): if fieldfilter: valname = json_fields[tshark_fields.index(fieldfilter[i])] else: valname = json_fields[i] if valname in special_fields: v = special_fields[valname] try: v = int(tab_line[i]) except ValueError: pass packet[valname] = v else: packet[valname] = tab_line[i] fill_packet(packet, disable_json) timestamp = packet['timestamp'] if ck: protocol = protocols[str(packet['protocol'])] rKey = "{}:{}:{}".format(sensorname, protocol, timestamp) if protocol not in prot: prot.append(protocol) else: rKey = "{}:{}".format(sensorname, timestamp) p = red.pipeline() if timestamp != lastday: p.sadd("DAYS", timestamp) lastday = timestamp for f in packet: if f not in non_index: feature = packet[f] redisKey = "{}:{}".format(rKey, f) p.sadd("FIELDS", f) p.zincrby(redisKey, feature, 1) p.execute() if ck: for pr in prot: red.sadd("PROTOCOLS", pr) potiron.infomsg('Data from {} stored into redis'.format(filename)) else: allpackets = [] # Describe the source allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname, "filename": os.path.basename(filename), "bpf": bpf}) # Each packet has a incremental numeric id # A packet is identified with its sensorname filename and packet id for # further aggregation with meta data. # Assumption: Each program process the pcap file the same way? packet_id = 0 for line in proc.stdout.readlines(): packet_id = packet_id + 1 line = line[:-1].decode() packet = {} tab_line = line.split(' ') for i in range(len(tab_line)): if fieldfilter: valname = json_fields[tshark_fields.index(fieldfilter[i])] else: valname = json_fields[i] if valname in special_fields: v = special_fields[valname] try: v = int(tab_line[i]) except ValueError: pass packet[valname] = v else: packet[valname] = tab_line[i] fill_packet(packet, disable_json) packet['packet_id'] = packet_id packet['type'] = potiron.TYPE_PACKET packet['state'] = potiron.STATE_NOT_ANNOTATE # FIXME might consume a lot of memory allpackets.append(packet) # FIXME Implement polling because wait can last forever proc.wait() if proc.returncode != 0: errmsg = b"".join(proc.stderr.readlines()) raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg)) # Write and save the json file jsonfilename = potiron.store_packet(rootdir, filename, json.dumps(allpackets)) if b_redis: # If redis option, store data into redis potiron_redis.process_storage(jsonfilename, red, ck)
parser = argparse.ArgumentParser(description='Import json documents into redis time series') parser.add_argument('-i', '--input', type=str, nargs=1, help='Filename of a json document that should be imported.') parser.add_argument('-u', '--unix', type=str, nargs=1, help='Unix socket to connect to redis-server.') parser.add_argument('--reverse', action='store_false', help='Create global reverse dictionaries') args = parser.parse_args() if args.unix is None: sys.stderr.write('A unix socket must be specified\n') sys.exit(1) usocket = args.unix[0] red = redis.Redis(unix_socket_path=usocket) if not args.reverse: potiron.create_reverse_global_dicts(red) potiron.infomsg("Created global reverse annotation dictionaries") sys.exit(0) if args.input is None: sys.stderr.write('A filename must be specified\n') sys.exit(1) filename = args.input[0] # Check if file was already imported fn = os.path.basename(filename) if red.sismember("FILES", fn): sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n') sys.exit(0) red.sadd("FILES", fn) f = open(filename, 'r')
else: filename = args.read[0] fn = get_file_struct(args.directory[0], filename) t = fn.split('/') d = "/".join(t[0:-1]) # When processing in parallel the directory could have been created # Between the directory test and makedirs try: if not os.path.exists(d): os.makedirs(d) except OSError as e: if e.errno != 17: # Something else happened propagate exception raise OSError(e) potiron.infomsg("Someone else created the directory") fd = open(fn, "w") newdocs = [] for doc in docs: # If the mandatory fields are not present the document should be left intact mod_doc = doc if 'type' in doc: if doc['type'] == potiron.TYPE_PACKET: # Do all the annotations # if obj.check_mandatory_fields(doc): # mod_doc = obj.annoate_doc(doc) if pdns.check_mandatory_fields(doc): mod_doc = pdns.annoate_doc(mod_doc) if asn.check_mandatory_fields(mod_doc): mod_doc = asn.annoate_doc(mod_doc) newdocs.append(mod_doc)
if args.outputdir is None: # Destination directory for the output file outputdir = "./out/" else: outputdir = args.outputdir[0] if not outputdir.endswith('/'): outputdir = "{}/".format(outputdir) if not os.path.exists(outputdir): os.makedirs(outputdir) with_protocols = args.without_protocols # Defines if scores should be displayed for protocols together or for each protocol if red.sismember("CK", "YES"): # Defines if combined keys are used in the current redis database ck = True else: if with_protocols: # If combined keys are not used, it is not possible to display scores for each protocol, with_protocols = False # and they will be displayed for protocols together potiron.infomsg('You did not choose to use the parameter "without_protocols" but your redis database is not currently supporting combined keys.\ It will continue anyway without specifying each protocol..') ck = False links = args.links # Defines if bokeh plots should be processed for each value in bubbles gen = args.generate # Defines if charts should be auto-generated from datafiles potiron_path = potiron.potiron_path # Project directory current_path = potiron.current_path # Module directory if args.logo is None: # Define path of circl logo, based on potiron path logofile = "{}doc/circl.png".format(potiron_path) else: logofile = args.logo[0] if links:
def process_storage(filename, red, ck): # Check if file was already imported fn = os.path.basename(filename) if red.sismember("FILES", fn): sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n') sys.exit(0) # FIXME Users have to be carefull with the files extensions to not process data from capture files # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module) f = open(filename, 'r') doc = json.load(f) f.close() # Record local dictionaries local_dicts = dict() rev_dics = dict() # Get sensorname assume one document per sensor name item = doc[0] bpf = item['bpf'] # If redis key 'BPF' already exists if red.keys('BPF'): # Check is the current bpf is the same as the one previously used if not red.sismember('BPF', bpf): bpf_string = str(red.smembers('BPF')) sys.stderr.write( '[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n' .format(bpf_string[3:-2])) sys.exit(0) # On the other case, add the bpf in the key 'BPF' else: red.sadd('BPF', bpf) # If combined keys are used if ck: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis if red.sismember('CK', 'NO'): sys.stderr.write( '[INFO] Combined key are not used in this redis dataset.\n' ) sys.exit(0) # If redis key 'CK' does not exist ... else: red.sadd('CK', 'YES') # If combined key are not used, the key 'CK' should exist anyway, with the value 'None' else: # If redis key 'CK' already exists ... if red.keys('CK'): # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis if red.sismember('CK', 'YES'): sys.stderr.write( '[INFO] Combined key are used in this redis dataset.\n') sys.exit(0) # On the other case, we add it else: red.sadd('CK', 'NO') red.sadd("FILES", fn) # Project directory potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3] protocols_path = "{}doc/protocols".format(potiron_path) protocols = potiron.define_protocols(protocols_path) # FIXME documents must include at least a sensorname and a timestamp # FIXME check timestamp format sensorname = potiron.get_sensor_name(doc) lastday = None revcreated = False prot = [] for di in doc: if di["type"] > potiron.DICT_LOWER_BOUNDARY: local_dicts[di["type"]] = di if di["type"] == potiron.TYPE_PACKET: if not revcreated: # FIXME if a json file was annotated twice the resulting json file # includes two dictionaries of the same type # Only the last one is considered rev_dics = potiron.create_reverse_local_dicts(local_dicts) revcreated = True key = sensorname if ck: protocol = protocols[str(di['protocol'])] key += ":{}".format(protocol) if protocol not in prot: prot.append(protocol) timestamp = di['timestamp'] (day, time) = timestamp.split(' ') day = day.replace('-', '') p = red.pipeline() if day != lastday: p.sadd("DAYS", day) lastday = day for k in list(di.keys()): if k not in non_index: feature = di[k] if k.startswith(potiron.ANNOTATION_PREFIX): feature = potiron.translate_dictionaries( rev_dics, red, k, di[k]) # Create the links between annotations and their objects idn = potiron.get_dictionary_id(k) obj = potiron.get_annotation_origin(di, k) if obj is not None and idn is not None: kn = "AR_{}_{}".format(idn, obj) p.set(kn, feature) keyname = "{}:{}:{}".format(key, day, k) p.sadd("FIELDS", k) p.zincrby(keyname, feature, 1) # FIXME the pipe might be to big peridocially flush them p.execute() if ck: for pr in prot: red.sadd("PROTOCOLS", pr) potiron.infomsg('Data from {} stored into redis'.format(filename))
parser = argparse.ArgumentParser(description='Import data from json documents into redis.') parser.add_argument('-i', '--input', type=str, nargs=1, help='Filename of a json document that should be imported.') parser.add_argument('-u', '--unix', type=str, nargs=1, help='Unix socket to connect to redis-server.') parser.add_argument('--reverse', action='store_false', help='Create global reverse dictionaries') args = parser.parse_args() if args.unix is None: sys.stderr.write('A unix socket must be specified\n') sys.exit(1) usocket = args.unix[0] red = redis.Redis(unix_socket_path=usocket) if not args.reverse: potiron.create_reverse_global_dicts(red) potiron.infomsg("Created global reverse annotation dictionaries") sys.exit(0) if args.input is None: sys.stderr.write('A filename must be specified\n') sys.exit(1) filename = args.input[0] # Check if file was already imported fn = os.path.basename(filename) if red.sismember("FILES", fn): sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n') sys.exit(0) red.sadd("FILES", fn) f = open(filename, 'r')