Beispiel #1
0
class Log(object):
    def __init__ (self, connectionPool, myhost = None, debug = 0):
        self.debug = debug
        self.pool = connectionPool
        
        if myhost != None:
            self.myhost = myhost
        else:
            self.myhost = socket.gethostname()
        
        self.registry = Registry(connectionPool, debug)
        self.num_servers = self.registry.get('hadoop.num_servers')

        if self.num_servers == None:
            self.num_servers = 1
        
        self.salt = Salt(self.num_servers, self.debug)
    
    def L(self, msg):
         self.salt.next()
         try:
            rowkey = struct.pack(">HI", self.salt.next(), int(time.time()))
            rowdict =      {
                                'b:hostname': str(self.myhost),
                                'b:msg': str(msg)
                            }
            with self.pool.connection() as connection:
                connection.table('log').put(rowkey, rowdict)
         except Exception as e:
            print "failed to put record to 'log' table: ", rowdict

            
Beispiel #2
0
class Indexer(object):
    """


    """
    def __init__ (self, connectionPool, index_type, num_servers = 1, table_batch_size = 1000, debug = 0):
        self.debug = debug
        print "indexer connect"
        self.pool = connectionPool
        print "indexer load primary index map"
        self.primary_index = PrimaryIndex(connectionPool, debug)
        print "index init log"
        self.log = Log(connectionPool)
        
        self.num_servers = num_servers
        self.packers = {}
        
        for packer in self.primary_index.names():
            try:
                package='DB.PrimaryIndex.PackUnpack'
                self.L("loading packer " + package + "." + packer)
                __import__(package + "." + packer)
                pkg = sys.modules[package + "." + packer]
                self.packers[packer] = getattr(pkg, packer)
            except ImportError as e:
                self.L("warning: failed to load " + packer)
                    
        with self.pool.connection() as dbh:
            t = dbh.tables()
            
            self.table_name = "index_" + index_type
            
            if not self.table_name in t:
                self.L("index table %s doesnt exist, creating it" % (self.table_name))
                dbh.create_table(self.table_name, {'b': {'COMPRESSION': 'SNAPPY'}})
            
            table_batch_size = 5
            
            self.table = dbh.table(self.table_name).batch(batch_size=table_batch_size)
            self.co_table = dbh.table("cif_objs").batch(batch_size=table_batch_size)
            
            self.reset()
            self.md5 = hashlib.md5()
            self.salt = Salt(self.num_servers, self.debug)
    
    def L(self, msg):
        caller =  ".".join([str(__name__), sys._getframe(1).f_code.co_name])
        if self.debug != None:
            print caller + ": " + msg
        else:
            self.log.L(caller + ": " + msg)
            
    def pack_rowkey_ipv4(self, salt, addr):
        return struct.pack(">HB", self.salt.next(), self.TYPE_IPV4()) + self.packers['ipv4'].pack(addr)

    def pack_rowkey_ipv6(self, salt, addr):
        return struct.pack(">HB", self.salt.next(), self.TYPE_IPV6()) + self.packers['ipv6'].pack(addr)
    
    def pack_rowkey_fqdn(self, salt, fqdn):
        return struct.pack(">HB", self.salt.next(), self.TYPE_FQDN()) + self.packers['domain'].pack(fqdn)
    
    def pack_rowkey_url(self, salt, url):
        return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['url'].pack(url)

    def pack_rowkey_email(self, salt, email):
        return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['email'].pack(email)
    
    def pack_rowkey_search(self, salt, search):
        return struct.pack(">HB", self.salt.next(), self.TYPE_SEARCH()) + self.packers['search'].pack(search) 
    
    def pack_rowkey_malware(self, salt, malware_hash):
        return struct.pack(">HB", self.salt.next(), self.TYPE_MALWARE()) + self.packers['malware'].pack(malware_hash) 
    
    def pack_rowkey_asn(self, salt, asn):
        return struct.pack(">HB", self.salt.next(), self.TYPE_ASN()) + self.packers['asn'].pack(asn) 
    
    def reset(self):
        self.empty = True
        self.addr = None
        self.rowkey = None
        self.confidence = None
        self.addr_type = None
        self.iodef_rowkey = None
    
    def commit(self):
        """
        Commit the record to the index_* table
        Update cif_objs(rowkey=self.iodef_rowkey) so that 'b:{self.table_name}_{self.rowkey}' = 1
        Purger will remove the reference when this feed record is purged.
        
        With hbase, you can put an addt'l cell value into a table/row without having to 
        merge. Existing cells won't be affected.
        """
        try:
            rowdict =      {
                                'b:confidence': str(self.confidence),
                                'b:addr_type': str(self.addr_type),
                                'b:iodef_rowkey': str(self.iodef_rowkey)
                            }
            
            self.table.put(self.rowkey, rowdict)
            fmt = "%ds" % (len(self.table_name) + 4)
            prk = struct.pack(fmt, "cf:" + str(self.table_name) + "_") + self.rowkey            
            self.co_table.put(self.iodef_rowkey, { prk: "1" })
            
        except Exception as e:
            self.L("failed to put record to %s table: " % self.table_name)
            print e
        
        self.reset()

            
    def extract(self, iodef_rowkey, iodef):
        """
        FIX atm this is iodef specific. ideally we will be able to index other document types
        """
        self.reset()

        self.iodef_rowkey = iodef_rowkey
        
        self.md5.update(iodef.SerializeToString())
        self.hash = self.md5.digest()
    
        ii = iodef.Incident[0]
        
        #print ii
        
        self.confidence = ii.Assessment[0].Confidence.content
        self.severity = ii.Assessment[0].Impact[0].severity
        
        # for malware hashes, they appear at the top level for now
        # iodef.incident[].additionaldata.meaning = "malware hash"
        # iodef.incident[].additionaldata.content = "[the hash]"
        
        if hasattr(ii, 'AdditionalData'):
            for ed in ii.AdditionalData:
                #print "ED ", ed
                if ed.meaning == "malware hash":
                    self.L("\tIndexing for malware hash")
                    self.rowkey = self.pack_rowkey_malware(self.salt.next(), ed.content)
                    self.commit()
        
        # addresses and networks are in the EventData[].Flow[].System[] tree
        
        if len(ii.EventData) > 0 or hasattr(ii, 'EventData'):
            
            for ed in ii.EventData:
                for fl in ed.Flow:
                    for sy in fl.System:
                        for i in sy.Node.Address:
                            self.addr_type = i.category
                            
                            if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr)
                                self.L("Indexing for ipv4")
                                
                                self.commit()
                                
                            # ipv6 addresses and networks
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
                                self.L("Indexing for ipv6")
                                
                                self.commit()
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_asn:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
                                self.L("Indexing for ASN")
                                
                                self.commit()
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ext_value:
                                if i.ext_category == "fqdn":
                                    self.fqdn = i.content
                                    self.rowkey = self.pack_rowkey_fqdn(self.salt.next(), self.fqdn)
                                    self.L("Indexing for FQDDN")
                                    
                                    self.commit()
                                    
                                elif i.ext_category == "url":
                                    self.rowkey = self.pack_rowkey_url(self.salt.next(), i.content)
                                    self.L("Indexing for URL")
                                    self.commit()
                                
                                else:
                                    e = self.primary_index.enum(i.ext_category)
                                    if len(e) > 0:
                                        self.rowkey = struct.pack(">HB", self.salt.next(), e[0]) + self.packers[i.ext_category].pack(i.content) 
                                        self.commit()
                                    else:
                                        self.L("Unknown primary index given " + i.ext_category)
                                    
                            else:
                                print "unhandled category: ", i
                    
    def TYPE_IPV4(self):
        return self.primary_index.enum('ipv4')
    
    def TYPE_IPV6(self):
        return self.primary_index.enum('ipv6')
    
    def TYPE_FQDN(self):
        return self.primary_index.enum('domain')
    
    def TYPE_URL(self):
        return self.primary_index.enum('url')
    
    def TYPE_EMAIL(self):
        return self.primary_index.enum('email')
    
    def TYPE_SEARCH(self):
        return self.primary_index.enum('search')
    
    def TYPE_MALWARE(self):
        return self.primary_index.enum('malware')
    
    def TYPE_ASN(self):
        return self.primary_index.enum('asn')
Beispiel #3
0
     print "Initializing Exploder"
     exploder = Exploder.Exploder(connectionPool, thread_tracker, False)
     
     print "Initializing Purger"
     purger = Purger.Purger(connectionPool, num_servers, thread_tracker, True)
     
     while True:
         msg = msg_pb2.MessageType()
         msg.ParseFromString(subscriber.recv())
 
         
         if apikeys.is_valid(msg.apikey):
             if msg.type == msg_pb2.MessageType.SUBMISSION and len(msg.submissionRequest) > 0:
                 #print "Got a SUBMISSION. Saving."
                 for i in range(0, len(msg.submissionRequest)):
                     writeToDb(cif_objs, cif_idl, msg.submissionRequest[i], salt.next())
             
             # ignore QUERY logic at present, see controlmessagehandler, above, instead
             # we arent processing QUERYs recvd via this PUB/SUB connection 
             elif msg.type == msg_pb2.MessageType.QUERY and len(msg.queryRequest) > 0:
                 print "Got an unexected QUERY on PUB/SUB interface"
             else:
                 print "Wrong or empty message recvd on subscriber port. Expected submission or query (" + \
                     str(msg_pb2.MessageType.SUBMISSION) + " or " +                               \
                     str(msg_pb2.MessageType.QUERY) + ")  got " +                                 \
                     str(msg.type) + " number of parts (should be > 0) SR:" +                     \
                     str(len(msg.submissionRequest)) + " / QR:" + str(len(msg.queryRequest)) 
         else:
             print "message has an invalid apikey"
             
     cf.unregister()
Beispiel #4
0
class Botnet(object):
    """
    tablename: infrastructure_botnet
    key: salt + address or salt + fqdn
         address is left padded with nulls into a 16 byte field
         fqdn is simply appended
    columns:
        b:prefix, asn, asn_desc, rir, cc, confidence, addr_type, port, ip_proto
    """
    def __init__ (self, connection, num_servers = 1, debug = 0):
        self.debug = debug
        self.dbh = connection
        self.num_servers = num_servers
        
        t = self.dbh.tables()
        
        if not "infrastructure_botnet" in t:
            raise Exception("missing infrastructure_botnet table")

        self.table = connection.table('infrastructure_botnet').batch(batch_size=5)
        
        self.reset()
        self.md5 = hashlib.md5()
        self.salt = Salt(self.num_servers, self.debug)
        
    def L(self, msg):
        caller =  ".".join([str(__name__), sys._getframe(1).f_code.co_name])
        if self.debug != None:
            print caller + ": " + msg
        else:
            syslog.syslog(caller + ": " + msg)
            
    def pack_rowkey_ipv4(self, salt, addr):
        """
        rowkey: salt (2 bytes) + keytype(0x0=ipv4) + packedaddr(4 bytes)
        """
        if re.match(r'^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$', addr) != None:
            a = addr.split(".")
            b = int(a[0])<<24 | int(a[1])<<16 | int(a[2])<<8 | int(a[3])
            print "making rowkey for ", self.addr, " int=", b
            return struct.pack(">HBI", self.salt.next(), 0x0, b) 
        else:
            raise Exception("Not an ipv4 addr: " + addr)
        
    def pack_rowkey_ipv6(self, salt, addr):
        """
        rowkey: salt (2 bytes) + keytype(0x1=ipv6) + packedaddr(16 bytes)
        """
        return struct.pack(">HBIIII", self.salt.next(), 0x1, self.addr) 
    
    def pack_rowkey_fqdn(self, salt, fqdn):
        """
        rowkey: salt (2 bytes) + keytype(0x2=fqdn) + string
        """
        return struct.pack(">HBs", self.salt.next(), 0x2, fqdn) 
    
    def reset(self):
        self.empty = True
        self.prefix = None
        self.asn = None
        self.asn_desc = None
        self.rir = None
        self.cc = None
        self.addr = None
        self.rowkey = None
        self.confidence = None
        self.addr_type = None
        self.port = None
        self.proto = None
        self.hash = None
    
    def commit(self):
        if self.empty == False:
            self.L("committing")
            try:
                self.table.put(self.rowkey, 
                               {
                                    'b:prefix': str(self.prefix),
                                    'b:asn': str(self.asn),
                                    'b:asn_desc': str(self.asn_desc),
                                    'b:rir': str(self.rir),
                                    'b:cc': str(self.cc),
                                    'b:confidence': str(self.confidence),
                                    'b:addr_type': str(self.addr_type),
                                    'b:port': str(self.port),
                                    'b:proto': str(self.proto)
                                })
            except Exception as e:
                self.L("failed to put record to infra_botnet table: ")
                print e
        else:
            self.L("nothing to commit")
            
    def extract(self, iodef):
        self.reset()
        
        self.md5.update(iodef.SerializeToString())
        self.hash = self.md5.digest()
    
        ii = iodef.Incident[0]
        
        self.confidence = ii.Assessment[0].Confidence.content
        self.severity = ii.Assessment[0].Impact[0].severity
        self.addr_type = ii.EventData[0].Flow[0].System[0].Node.Address[0].category
        
        # ipv4 addresses and networks
        
        if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net:
            self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content
            self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr)

            if 'Port' in ii.EventData[0].Flow[0].System[0].Service:
                self.port = ii.EventData[0].Flow[0].System[0].Service.Port
            if 'ip_proto' in ii.EventData[0].Flow[0].System[0].Service:
                self.proto = ii.EventData[0].Flow[0].System[0].Service.ip_protocol
                
            for i in ii.EventData[0].Flow[0].System[0].AdditionalData:
                    if i.meaning == 'prefix':
                        self.prefix = i.content
                    elif i.meaning == 'asn':
                        self.asn = i.content
                    elif i.meaning == 'asn_desc':
                        self.asn_desc = i.content
                    elif i.meaning == 'rir':
                        self.rir = i.content
                    elif i.meaning == 'cc':
                        self.cc = i.content
        
            self.empty = False
            
        # ipv6 addresses and networks
        
        elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net:
            self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content
            self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
            
            for i in ii.EventData[0].Flow[0].System[0].AdditionalData:
                    if i.meaning == 'prefix':
                        self.prefix = i.content
                    elif i.meaning == 'asn':
                        self.asn = i.content
                    elif i.meaning == 'asn_desc':
                        self.asn_desc = i.content
                    elif i.meaning == 'rir':
                        self.rir = i.content
                    elif i.meaning == 'cc':
                        self.cc = i.content
        
            self.empty = False