Example #1
0
 def check_mandatory_fields(self, doc):
     complete = True
     for field in self.mfields:
         if field not in doc:
             infomsg("Field "+ field +" is missing for annotations")
             complete = False
     return complete
Example #2
0
 def check_mandatory_fields(self, doc):
     complete = True
     for field in self.mfields:
         if field not in doc:
             infomsg("Field " + field + " is missing for annotations")
             complete = False
     return complete
def store_packet(rootdir, pcapfilename, obj):
    if rootdir is not None:
        jsonfilename = potiron.get_file_struct(rootdir, pcapfilename)
        f = open(jsonfilename, "w")
        f.write(obj)
        f.close()
        infomsg("Created filename " + jsonfilename)
    else:
        sys.stdout.write(obj)
def store_packet(rootdir, pcapfilename, obj):
    if rootdir is not None:
        jsonfilename = potiron.get_file_struct(rootdir, pcapfilename)
        f = open(jsonfilename,"w")
        f.write(obj)
        f.close()
        infomsg("Created filename "+jsonfilename)
    else:
        sys.stdout.write(obj)
Example #5
0
 def handle_docs(self, docs):
     i = 0
     newdocs = []
     for doc in docs:
         i = i + 1
         if self.check_mandatory_fields(doc):
             doc = self.annoate_doc(doc)
         else:
             infomsg("Document number {} cannot be annotated due to missing mandatory fields".format(i))
         # If the document is not complete or could not be annotated it should be
         # left intact
         newdocs.append(doc)
     return newdocs
Example #6
0
 def handle_docs(self, docs):
     i = 0
     newdocs = [] 
     for doc in docs:
         i = i + 1
         if self.check_mandatory_fields(doc):
             doc = self.annoate_doc(doc)
         else:
             infomsg("Document number "+ str(i) +
                     " cannot be annotated due to missing mandatory fields")
         #If the document is not complete or could not be annotated it should be
         #left intact
         newdocs.append(doc)
     return newdocs
Example #7
0
def process_file(rootdir, filename, fieldfilter, b_redis, disable_json, ck):
    if disable_json:
        fn = os.path.basename(filename)
        if red.sismember("FILES", fn):
            sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n')
            sys.exit(0)
        # FIXME Users have to be carefull with the files extensions to not process data from capture files
        # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module)
        
        # List of fields that are included in the json documents that should not be ranked
        # FIXME Put this as argument to the program as this list depends on the documents that is introduced
        non_index = ['', 'filename', 'sensorname', 'timestamp', 'packet_id']
    
    # If tshark is not installed, exit and raise the error
    if not potiron.check_program("tshark"):
        raise OSError("The program tshark is not installed")
    # FIXME Put in config file
    
    tshark_fields = potiron.tshark_fields
    cmd = "tshark -n -q -Tfields "
    if fieldfilter:
        if 'frame.time_epoch' not in fieldfilter:
            fieldfilter.insert(0, 'frame.time_epoch')
        if 'ip.proto' not in fieldfilter:
            fieldfilter.insert(1, 'ip.proto')
        for p in fieldfilter:
            cmd += "-e {} ".format(p)
    else:
        for f in tshark_fields:
            cmd += "-e {} ".format(f)
    cmd += "-E header=n -E separator=/s -E occurrence=f -Y '{}' -r {} -o tcp.relative_sequence_numbers:FALSE".format(bpf, filename)
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Name of the honeypot
    sensorname = potiron.derive_sensor_name(filename)
    
    json_fields = potiron.json_fields
    special_fields = {'length': -1, 'ipttl': -1, 'iptos': 0, 'tcpseq': -1, 'tcpack': -1, 'icmpcode': 255, 'icmptype': 255}
    
    if disable_json:
        # If redis key 'BPF' already exists
        if red.keys('BPF'):
            # Check is the current bpf is the same as the one previously used
            if not red.sismember('BPF', bpf):
                bpf_string = str(red.smembers('BPF'))
                sys.stderr.write('[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n'.format(bpf_string[3:-2]))
                sys.exit(0)
        # On the other case, add the bpf in the key 'BPF'
        else:
            red.sadd('BPF', bpf)
    
        # If combined keys are used
        if ck:
            # If redis key 'CK' already exists ...
            if red.keys('CK'):
                # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis
                if red.sismember('CK','NO'):
                    sys.stderr.write('[INFO] Combined key are not used in this redis dataset.\n')
                    sys.exit(0)
            # If redis key 'CK' does not exist ...
            else:
                red.sadd('CK','YES')
        # If combined key are not used, the key 'CK' should exist anyway, with the value 'None'
        else:
            # If redis key 'CK' already exists ...
            if red.keys('CK'):
                # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis
                if red.sismember('CK','YES'):
                    sys.stderr.write('[INFO] Combined key are used in this redis dataset.\n')
                    sys.exit(0)
            # On the other case, we add it
            else:
                red.sadd('CK','NO')
    
        red.sadd("FILES", fn)
        
        potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3]
        protocols_path = "{}doc/protocols".format(potiron_path)
        protocols = potiron.define_protocols(protocols_path)
        
        lastday = None
        prot = []
        for line in proc.stdout.readlines():
            line = line[:-1].decode()
            packet = {}
            tab_line = line.split(' ')
            for i in range(len(tab_line)):
                if fieldfilter:
                    valname = json_fields[tshark_fields.index(fieldfilter[i])]
                else:
                    valname = json_fields[i]
                if valname in special_fields:
                    v = special_fields[valname]
                    try:
                        v = int(tab_line[i])
                    except ValueError:
                        pass
                    packet[valname] = v
                else:
                    packet[valname] = tab_line[i]
            fill_packet(packet, disable_json)
            timestamp = packet['timestamp']
            if ck:
                protocol = protocols[str(packet['protocol'])]
                rKey = "{}:{}:{}".format(sensorname, protocol, timestamp)
                if protocol not in prot:
                    prot.append(protocol)
            else:
                rKey = "{}:{}".format(sensorname, timestamp)
            p = red.pipeline()
            if timestamp != lastday:
                p.sadd("DAYS", timestamp)
                lastday = timestamp
            for f in packet:
                if f not in non_index:
                    feature = packet[f]
                    redisKey = "{}:{}".format(rKey, f)
                    p.sadd("FIELDS", f)
                    p.zincrby(redisKey, feature, 1)
            p.execute()
        if ck:
            for pr in prot:
                red.sadd("PROTOCOLS", pr)
        potiron.infomsg('Data from {} stored into redis'.format(filename))
        
    else:
        allpackets = []
        # Describe the source
        allpackets.append({"type": potiron.TYPE_SOURCE, "sensorname": sensorname,
                           "filename": os.path.basename(filename), "bpf": bpf})
        # Each packet has a incremental numeric id
        # A packet is identified with its sensorname filename and packet id for
        # further aggregation with meta data.
        # Assumption: Each program process the pcap file the same way?
        packet_id = 0
        
        for line in proc.stdout.readlines():
            packet_id = packet_id + 1
            line = line[:-1].decode()
            packet = {}
            tab_line = line.split(' ')
            for i in range(len(tab_line)):
                if fieldfilter:
                    valname = json_fields[tshark_fields.index(fieldfilter[i])]
                else:
                    valname = json_fields[i]
                if valname in special_fields:
                    v = special_fields[valname]
                    try:
                        v = int(tab_line[i])
                    except ValueError:
                        pass
                    packet[valname] = v
                else:
                    packet[valname] = tab_line[i]
            fill_packet(packet, disable_json)
            packet['packet_id'] = packet_id
            packet['type'] = potiron.TYPE_PACKET
            packet['state'] = potiron.STATE_NOT_ANNOTATE
            # FIXME might consume a lot of memory
            allpackets.append(packet)
    
        # FIXME Implement polling because wait can last forever
        proc.wait()
    
        if proc.returncode != 0:
            errmsg = b"".join(proc.stderr.readlines())
            raise OSError("tshark failed. Return code {}. {}".format(proc.returncode, errmsg))
        # Write and save the json file
        jsonfilename = potiron.store_packet(rootdir, filename, json.dumps(allpackets))
        if b_redis:
            # If redis option, store data into redis
            potiron_redis.process_storage(jsonfilename, red, ck)
parser = argparse.ArgumentParser(description='Import json documents into redis time series')
parser.add_argument('-i', '--input', type=str, nargs=1, help='Filename of a json document that should be imported.')
parser.add_argument('-u', '--unix', type=str, nargs=1, help='Unix socket to connect to redis-server.')
parser.add_argument('--reverse', action='store_false', help='Create global reverse dictionaries')
args = parser.parse_args()

if args.unix is None:
    sys.stderr.write('A unix socket must be specified\n')
    sys.exit(1)
usocket = args.unix[0]

red = redis.Redis(unix_socket_path=usocket)

if not args.reverse:
    potiron.create_reverse_global_dicts(red)
    potiron.infomsg("Created global reverse annotation dictionaries")
    sys.exit(0)

if args.input is None:
    sys.stderr.write('A filename must be specified\n')
    sys.exit(1)
filename = args.input[0]

# Check if file was already imported
fn = os.path.basename(filename)
if red.sismember("FILES", fn):
    sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n')
    sys.exit(0)
red.sadd("FILES", fn)

f = open(filename, 'r')
Example #9
0
    else:
        filename = args.read[0]

    fn = get_file_struct(args.directory[0], filename)
    t = fn.split('/')
    d = "/".join(t[0:-1])
    # When processing in parallel the directory could have been created
    # Between the directory test and makedirs
    try:
        if not os.path.exists(d):
            os.makedirs(d)
    except OSError as e:
        if e.errno != 17:
            # Something else happened propagate exception
            raise OSError(e)
        potiron.infomsg("Someone else created the directory")
    fd = open(fn, "w")
newdocs = []
for doc in docs:
    # If the mandatory fields are not present the document should be left intact
    mod_doc = doc
    if 'type' in doc:
        if doc['type'] == potiron.TYPE_PACKET:
            # Do all the annotations
            # if obj.check_mandatory_fields(doc):
            #    mod_doc = obj.annoate_doc(doc)
            if pdns.check_mandatory_fields(doc):
                mod_doc = pdns.annoate_doc(mod_doc)
            if asn.check_mandatory_fields(mod_doc):
                mod_doc = asn.annoate_doc(mod_doc)
            newdocs.append(mod_doc)
Example #10
0
if args.outputdir is None: # Destination directory for the output file
    outputdir = "./out/"
else:
    outputdir = args.outputdir[0]
    if not outputdir.endswith('/'):
        outputdir = "{}/".format(outputdir)
if not os.path.exists(outputdir):
    os.makedirs(outputdir)

with_protocols = args.without_protocols # Defines if scores should be displayed for protocols together or for each protocol
if red.sismember("CK", "YES"): # Defines if combined keys are used in the current redis database
    ck = True
else:
    if with_protocols: # If combined keys are not used, it is not possible to display scores for each protocol,
        with_protocols = False # and they will be displayed for protocols together
        potiron.infomsg('You did not choose to use the parameter "without_protocols" but your redis database is not currently supporting combined keys.\
                        It will continue anyway without specifying each protocol..')
    ck = False
 
links = args.links # Defines if bokeh plots should be processed for each value in bubbles

gen = args.generate # Defines if charts should be auto-generated from datafiles

potiron_path = potiron.potiron_path # Project directory
current_path = potiron.current_path # Module directory

if args.logo is None: # Define path of circl logo, based on potiron path
    logofile = "{}doc/circl.png".format(potiron_path)
else:
    logofile = args.logo[0]

if links:
Example #11
0
    else:
        filename = args.read[0]

    fn = get_file_struct(args.directory[0], filename)
    t = fn.split('/')
    d = "/".join(t[0:-1])
    # When processing in parallel the directory could have been created
    # Between the directory test and makedirs
    try:
        if not os.path.exists(d):
            os.makedirs(d)
    except OSError as e:
        if e.errno != 17:
            # Something else happened propagate exception
            raise OSError(e)
        potiron.infomsg("Someone else created the directory")
    fd = open(fn, "w")
newdocs = []
for doc in docs:
    # If the mandatory fields are not present the document should be left intact
    mod_doc = doc
    if 'type' in doc:
        if doc['type'] == potiron.TYPE_PACKET:
            # Do all the annotations
            # if obj.check_mandatory_fields(doc):
            #    mod_doc = obj.annoate_doc(doc)
            if pdns.check_mandatory_fields(doc):
                mod_doc = pdns.annoate_doc(mod_doc)
            if asn.check_mandatory_fields(mod_doc):
                mod_doc = asn.annoate_doc(mod_doc)
            newdocs.append(mod_doc)
Example #12
0
def process_storage(filename, red, ck):
    # Check if file was already imported
    fn = os.path.basename(filename)
    if red.sismember("FILES", fn):
        sys.stderr.write('[INFO] Filename ' + fn +
                         ' was already imported ... skip ...\n')
        sys.exit(0)
    # FIXME Users have to be carefull with the files extensions to not process data from capture files
    # FIXME (potiron-json-tshark module), and the same sample again from json files (potiron_redis module)

    f = open(filename, 'r')
    doc = json.load(f)
    f.close()

    # Record local dictionaries
    local_dicts = dict()
    rev_dics = dict()

    # Get sensorname assume one document per sensor name
    item = doc[0]
    bpf = item['bpf']
    # If redis key 'BPF' already exists
    if red.keys('BPF'):
        # Check is the current bpf is the same as the one previously used
        if not red.sismember('BPF', bpf):
            bpf_string = str(red.smembers('BPF'))
            sys.stderr.write(
                '[INFO] BPF for the current data is not the same as the one used in the data already stored here : {}\n'
                .format(bpf_string[3:-2]))
            sys.exit(0)
    # On the other case, add the bpf in the key 'BPF'
    else:
        red.sadd('BPF', bpf)

    # If combined keys are used
    if ck:
        # If redis key 'CK' already exists ...
        if red.keys('CK'):
            # ... BUT is set to 'Ńone', then combined keys are not used in the data already stored in redis
            if red.sismember('CK', 'NO'):
                sys.stderr.write(
                    '[INFO] Combined key are not used in this redis dataset.\n'
                )
                sys.exit(0)
        # If redis key 'CK' does not exist ...
        else:
            red.sadd('CK', 'YES')
    # If combined key are not used, the key 'CK' should exist anyway, with the value 'None'
    else:
        # If redis key 'CK' already exists ...
        if red.keys('CK'):
            # ... BUT is not set to 'None', then combined keys are used in the data already stored in redis
            if red.sismember('CK', 'YES'):
                sys.stderr.write(
                    '[INFO] Combined key are used in this redis dataset.\n')
                sys.exit(0)
        # On the other case, we add it
        else:
            red.sadd('CK', 'NO')

    red.sadd("FILES", fn)

    # Project directory
    potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3]
    protocols_path = "{}doc/protocols".format(potiron_path)
    protocols = potiron.define_protocols(protocols_path)

    # FIXME documents must include at least a sensorname and a timestamp
    # FIXME check timestamp format
    sensorname = potiron.get_sensor_name(doc)
    lastday = None
    revcreated = False
    prot = []
    for di in doc:
        if di["type"] > potiron.DICT_LOWER_BOUNDARY:
            local_dicts[di["type"]] = di
        if di["type"] == potiron.TYPE_PACKET:
            if not revcreated:
                # FIXME if a json file was annotated twice the resulting json file
                # includes two dictionaries of the same type
                # Only the last one is considered
                rev_dics = potiron.create_reverse_local_dicts(local_dicts)
                revcreated = True
            key = sensorname
            if ck:
                protocol = protocols[str(di['protocol'])]
                key += ":{}".format(protocol)
                if protocol not in prot:
                    prot.append(protocol)
            timestamp = di['timestamp']
            (day, time) = timestamp.split(' ')
            day = day.replace('-', '')
            p = red.pipeline()
            if day != lastday:
                p.sadd("DAYS", day)
                lastday = day
            for k in list(di.keys()):
                if k not in non_index:
                    feature = di[k]
                    if k.startswith(potiron.ANNOTATION_PREFIX):
                        feature = potiron.translate_dictionaries(
                            rev_dics, red, k, di[k])
                        # Create the links between annotations and their objects
                        idn = potiron.get_dictionary_id(k)
                        obj = potiron.get_annotation_origin(di, k)
                        if obj is not None and idn is not None:
                            kn = "AR_{}_{}".format(idn, obj)
                            p.set(kn, feature)
                    keyname = "{}:{}:{}".format(key, day, k)
                    p.sadd("FIELDS", k)
                    p.zincrby(keyname, feature, 1)
            # FIXME the pipe might be to big peridocially flush them
            p.execute()
    if ck:
        for pr in prot:
            red.sadd("PROTOCOLS", pr)
    potiron.infomsg('Data from {} stored into redis'.format(filename))
Example #13
0
    parser = argparse.ArgumentParser(description='Import data from json documents into redis.')
    parser.add_argument('-i', '--input', type=str, nargs=1, help='Filename of a json document that should be imported.')
    parser.add_argument('-u', '--unix', type=str, nargs=1, help='Unix socket to connect to redis-server.')
    parser.add_argument('--reverse', action='store_false', help='Create global reverse dictionaries')
    args = parser.parse_args()
    if args.unix is None:
        sys.stderr.write('A unix socket must be specified\n')
        sys.exit(1)

    usocket = args.unix[0]

    red = redis.Redis(unix_socket_path=usocket)
    
    if not args.reverse:
        potiron.create_reverse_global_dicts(red)
        potiron.infomsg("Created global reverse annotation dictionaries")
        sys.exit(0)

    if args.input is None:
        sys.stderr.write('A filename must be specified\n')
        sys.exit(1)
    filename = args.input[0]
    
    # Check if file was already imported
    fn = os.path.basename(filename)
    if red.sismember("FILES", fn):
        sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n')
        sys.exit(0)
    red.sadd("FILES", fn)
    
    f = open(filename, 'r')
    parser = argparse.ArgumentParser(description='Import data from json documents into redis.')
    parser.add_argument('-i', '--input', type=str, nargs=1, help='Filename of a json document that should be imported.')
    parser.add_argument('-u', '--unix', type=str, nargs=1, help='Unix socket to connect to redis-server.')
    parser.add_argument('--reverse', action='store_false', help='Create global reverse dictionaries')
    args = parser.parse_args()
    if args.unix is None:
        sys.stderr.write('A unix socket must be specified\n')
        sys.exit(1)

    usocket = args.unix[0]

    red = redis.Redis(unix_socket_path=usocket)
    
    if not args.reverse:
        potiron.create_reverse_global_dicts(red)
        potiron.infomsg("Created global reverse annotation dictionaries")
        sys.exit(0)

    if args.input is None:
        sys.stderr.write('A filename must be specified\n')
        sys.exit(1)
    filename = args.input[0]
    
    # Check if file was already imported
    fn = os.path.basename(filename)
    if red.sismember("FILES", fn):
        sys.stderr.write('[INFO] Filename ' + fn + ' was already imported ... skip ...\n')
        sys.exit(0)
    red.sadd("FILES", fn)
    
    f = open(filename, 'r')