Example #1
0
class Log(object):
    def __init__ (self, connectionPool, myhost = None, debug = 0):
        self.debug = debug
        self.pool = connectionPool
        
        if myhost != None:
            self.myhost = myhost
        else:
            self.myhost = socket.gethostname()
        
        self.registry = Registry(connectionPool, debug)
        self.num_servers = self.registry.get('hadoop.num_servers')

        if self.num_servers == None:
            self.num_servers = 1
        
        self.salt = Salt(self.num_servers, self.debug)
    
    def L(self, msg):
         self.salt.next()
         try:
            rowkey = struct.pack(">HI", self.salt.next(), int(time.time()))
            rowdict =      {
                                'b:hostname': str(self.myhost),
                                'b:msg': str(msg)
                            }
            with self.pool.connection() as connection:
                connection.table('log').put(rowkey, rowdict)
         except Exception as e:
            print "failed to put record to 'log' table: ", rowdict

            
Example #2
0
    def __init__ (self, connection, num_servers = 1, debug = 0):
        self.debug = debug
        self.dbh = connection
        self.num_servers = num_servers
        
        t = self.dbh.tables()
        
        if not "infrastructure_botnet" in t:
            raise Exception("missing infrastructure_botnet table")

        self.table = connection.table('infrastructure_botnet').batch(batch_size=5)
        
        self.reset()
        self.md5 = hashlib.md5()
        self.salt = Salt(self.num_servers, self.debug)
Example #3
0
    def __init__ (self, connectionPool, myhost = None, debug = 0):
        self.debug = debug
        self.pool = connectionPool
        
        if myhost != None:
            self.myhost = myhost
        else:
            self.myhost = socket.gethostname()
        
        self.registry = Registry(connectionPool, debug)
        self.num_servers = self.registry.get('hadoop.num_servers')

        if self.num_servers == None:
            self.num_servers = 1
        
        self.salt = Salt(self.num_servers, self.debug)
Example #4
0
 def __init__ (self, connectionPool, index_type, num_servers = 1, table_batch_size = 1000, debug = 0):
     self.debug = debug
     print "indexer connect"
     self.pool = connectionPool
     print "indexer load primary index map"
     self.primary_index = PrimaryIndex(connectionPool, debug)
     print "index init log"
     self.log = Log(connectionPool)
     
     self.num_servers = num_servers
     self.packers = {}
     
     for packer in self.primary_index.names():
         try:
             package='DB.PrimaryIndex.PackUnpack'
             self.L("loading packer " + package + "." + packer)
             __import__(package + "." + packer)
             pkg = sys.modules[package + "." + packer]
             self.packers[packer] = getattr(pkg, packer)
         except ImportError as e:
             self.L("warning: failed to load " + packer)
                 
     with self.pool.connection() as dbh:
         t = dbh.tables()
         
         self.table_name = "index_" + index_type
         
         if not self.table_name in t:
             self.L("index table %s doesnt exist, creating it" % (self.table_name))
             dbh.create_table(self.table_name, {'b': {'COMPRESSION': 'SNAPPY'}})
         
         table_batch_size = 5
         
         self.table = dbh.table(self.table_name).batch(batch_size=table_batch_size)
         self.co_table = dbh.table("cif_objs").batch(batch_size=table_batch_size)
         
         self.reset()
         self.md5 = hashlib.md5()
         self.salt = Salt(self.num_servers, self.debug)
Example #5
0
class Indexer(object):
    """


    """
    def __init__ (self, connectionPool, index_type, num_servers = 1, table_batch_size = 1000, debug = 0):
        self.debug = debug
        print "indexer connect"
        self.pool = connectionPool
        print "indexer load primary index map"
        self.primary_index = PrimaryIndex(connectionPool, debug)
        print "index init log"
        self.log = Log(connectionPool)
        
        self.num_servers = num_servers
        self.packers = {}
        
        for packer in self.primary_index.names():
            try:
                package='DB.PrimaryIndex.PackUnpack'
                self.L("loading packer " + package + "." + packer)
                __import__(package + "." + packer)
                pkg = sys.modules[package + "." + packer]
                self.packers[packer] = getattr(pkg, packer)
            except ImportError as e:
                self.L("warning: failed to load " + packer)
                    
        with self.pool.connection() as dbh:
            t = dbh.tables()
            
            self.table_name = "index_" + index_type
            
            if not self.table_name in t:
                self.L("index table %s doesnt exist, creating it" % (self.table_name))
                dbh.create_table(self.table_name, {'b': {'COMPRESSION': 'SNAPPY'}})
            
            table_batch_size = 5
            
            self.table = dbh.table(self.table_name).batch(batch_size=table_batch_size)
            self.co_table = dbh.table("cif_objs").batch(batch_size=table_batch_size)
            
            self.reset()
            self.md5 = hashlib.md5()
            self.salt = Salt(self.num_servers, self.debug)
    
    def L(self, msg):
        caller =  ".".join([str(__name__), sys._getframe(1).f_code.co_name])
        if self.debug != None:
            print caller + ": " + msg
        else:
            self.log.L(caller + ": " + msg)
            
    def pack_rowkey_ipv4(self, salt, addr):
        return struct.pack(">HB", self.salt.next(), self.TYPE_IPV4()) + self.packers['ipv4'].pack(addr)

    def pack_rowkey_ipv6(self, salt, addr):
        return struct.pack(">HB", self.salt.next(), self.TYPE_IPV6()) + self.packers['ipv6'].pack(addr)
    
    def pack_rowkey_fqdn(self, salt, fqdn):
        return struct.pack(">HB", self.salt.next(), self.TYPE_FQDN()) + self.packers['domain'].pack(fqdn)
    
    def pack_rowkey_url(self, salt, url):
        return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['url'].pack(url)

    def pack_rowkey_email(self, salt, email):
        return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['email'].pack(email)
    
    def pack_rowkey_search(self, salt, search):
        return struct.pack(">HB", self.salt.next(), self.TYPE_SEARCH()) + self.packers['search'].pack(search) 
    
    def pack_rowkey_malware(self, salt, malware_hash):
        return struct.pack(">HB", self.salt.next(), self.TYPE_MALWARE()) + self.packers['malware'].pack(malware_hash) 
    
    def pack_rowkey_asn(self, salt, asn):
        return struct.pack(">HB", self.salt.next(), self.TYPE_ASN()) + self.packers['asn'].pack(asn) 
    
    def reset(self):
        self.empty = True
        self.addr = None
        self.rowkey = None
        self.confidence = None
        self.addr_type = None
        self.iodef_rowkey = None
    
    def commit(self):
        """
        Commit the record to the index_* table
        Update cif_objs(rowkey=self.iodef_rowkey) so that 'b:{self.table_name}_{self.rowkey}' = 1
        Purger will remove the reference when this feed record is purged.
        
        With hbase, you can put an addt'l cell value into a table/row without having to 
        merge. Existing cells won't be affected.
        """
        try:
            rowdict =      {
                                'b:confidence': str(self.confidence),
                                'b:addr_type': str(self.addr_type),
                                'b:iodef_rowkey': str(self.iodef_rowkey)
                            }
            
            self.table.put(self.rowkey, rowdict)
            fmt = "%ds" % (len(self.table_name) + 4)
            prk = struct.pack(fmt, "cf:" + str(self.table_name) + "_") + self.rowkey            
            self.co_table.put(self.iodef_rowkey, { prk: "1" })
            
        except Exception as e:
            self.L("failed to put record to %s table: " % self.table_name)
            print e
        
        self.reset()

            
    def extract(self, iodef_rowkey, iodef):
        """
        FIX atm this is iodef specific. ideally we will be able to index other document types
        """
        self.reset()

        self.iodef_rowkey = iodef_rowkey
        
        self.md5.update(iodef.SerializeToString())
        self.hash = self.md5.digest()
    
        ii = iodef.Incident[0]
        
        #print ii
        
        self.confidence = ii.Assessment[0].Confidence.content
        self.severity = ii.Assessment[0].Impact[0].severity
        
        # for malware hashes, they appear at the top level for now
        # iodef.incident[].additionaldata.meaning = "malware hash"
        # iodef.incident[].additionaldata.content = "[the hash]"
        
        if hasattr(ii, 'AdditionalData'):
            for ed in ii.AdditionalData:
                #print "ED ", ed
                if ed.meaning == "malware hash":
                    self.L("\tIndexing for malware hash")
                    self.rowkey = self.pack_rowkey_malware(self.salt.next(), ed.content)
                    self.commit()
        
        # addresses and networks are in the EventData[].Flow[].System[] tree
        
        if len(ii.EventData) > 0 or hasattr(ii, 'EventData'):
            
            for ed in ii.EventData:
                for fl in ed.Flow:
                    for sy in fl.System:
                        for i in sy.Node.Address:
                            self.addr_type = i.category
                            
                            if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr)
                                self.L("Indexing for ipv4")
                                
                                self.commit()
                                
                            # ipv6 addresses and networks
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
                                self.L("Indexing for ipv6")
                                
                                self.commit()
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_asn:
                                self.addr = i.content
                                self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
                                self.L("Indexing for ASN")
                                
                                self.commit()
                            
                            elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ext_value:
                                if i.ext_category == "fqdn":
                                    self.fqdn = i.content
                                    self.rowkey = self.pack_rowkey_fqdn(self.salt.next(), self.fqdn)
                                    self.L("Indexing for FQDDN")
                                    
                                    self.commit()
                                    
                                elif i.ext_category == "url":
                                    self.rowkey = self.pack_rowkey_url(self.salt.next(), i.content)
                                    self.L("Indexing for URL")
                                    self.commit()
                                
                                else:
                                    e = self.primary_index.enum(i.ext_category)
                                    if len(e) > 0:
                                        self.rowkey = struct.pack(">HB", self.salt.next(), e[0]) + self.packers[i.ext_category].pack(i.content) 
                                        self.commit()
                                    else:
                                        self.L("Unknown primary index given " + i.ext_category)
                                    
                            else:
                                print "unhandled category: ", i
                    
    def TYPE_IPV4(self):
        return self.primary_index.enum('ipv4')
    
    def TYPE_IPV6(self):
        return self.primary_index.enum('ipv6')
    
    def TYPE_FQDN(self):
        return self.primary_index.enum('domain')
    
    def TYPE_URL(self):
        return self.primary_index.enum('url')
    
    def TYPE_EMAIL(self):
        return self.primary_index.enum('email')
    
    def TYPE_SEARCH(self):
        return self.primary_index.enum('search')
    
    def TYPE_MALWARE(self):
        return self.primary_index.enum('malware')
    
    def TYPE_ASN(self):
        return self.primary_index.enum('asn')
Example #6
0
try:
    
    print "Connect to HBase"
    connectionPool = HBConnection(hbhost)
    with connectionPool.connection() as connection:
        cif_objs = connection.table('cif_objs').batch(batch_size=5) # set very low for development, set to 1000+ for test/qa/prod
        cif_idl = connection.table('cif_idl')
        
        print "Init Registry"
        registry = Registry(connectionPool, debug)
        num_servers = registry.get('hadoop.num_servers')
        if num_servers == None:
            num_servers = 1
            print "hadoop.num_servers not set. defaulting."
        print "hadoop.num_servers = ", num_servers
        salt = Salt(num_servers, debug)
    
        thread_tracker = ThreadTracker(debug)
        
        global apikeys
        
        log = Log(connectionPool)
        log.L("cif-db initializing")
        
        print "Initializing APIKeys object"
        apikeys = APIKeys(connection, True)
        
        print "Resolving our APIKey: " + myid
        
        apikey = apikeys.get_by_alias(myid)
        
Example #7
0
class Botnet(object):
    """
    tablename: infrastructure_botnet
    key: salt + address or salt + fqdn
         address is left padded with nulls into a 16 byte field
         fqdn is simply appended
    columns:
        b:prefix, asn, asn_desc, rir, cc, confidence, addr_type, port, ip_proto
    """
    def __init__ (self, connection, num_servers = 1, debug = 0):
        self.debug = debug
        self.dbh = connection
        self.num_servers = num_servers
        
        t = self.dbh.tables()
        
        if not "infrastructure_botnet" in t:
            raise Exception("missing infrastructure_botnet table")

        self.table = connection.table('infrastructure_botnet').batch(batch_size=5)
        
        self.reset()
        self.md5 = hashlib.md5()
        self.salt = Salt(self.num_servers, self.debug)
        
    def L(self, msg):
        caller =  ".".join([str(__name__), sys._getframe(1).f_code.co_name])
        if self.debug != None:
            print caller + ": " + msg
        else:
            syslog.syslog(caller + ": " + msg)
            
    def pack_rowkey_ipv4(self, salt, addr):
        """
        rowkey: salt (2 bytes) + keytype(0x0=ipv4) + packedaddr(4 bytes)
        """
        if re.match(r'^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$', addr) != None:
            a = addr.split(".")
            b = int(a[0])<<24 | int(a[1])<<16 | int(a[2])<<8 | int(a[3])
            print "making rowkey for ", self.addr, " int=", b
            return struct.pack(">HBI", self.salt.next(), 0x0, b) 
        else:
            raise Exception("Not an ipv4 addr: " + addr)
        
    def pack_rowkey_ipv6(self, salt, addr):
        """
        rowkey: salt (2 bytes) + keytype(0x1=ipv6) + packedaddr(16 bytes)
        """
        return struct.pack(">HBIIII", self.salt.next(), 0x1, self.addr) 
    
    def pack_rowkey_fqdn(self, salt, fqdn):
        """
        rowkey: salt (2 bytes) + keytype(0x2=fqdn) + string
        """
        return struct.pack(">HBs", self.salt.next(), 0x2, fqdn) 
    
    def reset(self):
        self.empty = True
        self.prefix = None
        self.asn = None
        self.asn_desc = None
        self.rir = None
        self.cc = None
        self.addr = None
        self.rowkey = None
        self.confidence = None
        self.addr_type = None
        self.port = None
        self.proto = None
        self.hash = None
    
    def commit(self):
        if self.empty == False:
            self.L("committing")
            try:
                self.table.put(self.rowkey, 
                               {
                                    'b:prefix': str(self.prefix),
                                    'b:asn': str(self.asn),
                                    'b:asn_desc': str(self.asn_desc),
                                    'b:rir': str(self.rir),
                                    'b:cc': str(self.cc),
                                    'b:confidence': str(self.confidence),
                                    'b:addr_type': str(self.addr_type),
                                    'b:port': str(self.port),
                                    'b:proto': str(self.proto)
                                })
            except Exception as e:
                self.L("failed to put record to infra_botnet table: ")
                print e
        else:
            self.L("nothing to commit")
            
    def extract(self, iodef):
        self.reset()
        
        self.md5.update(iodef.SerializeToString())
        self.hash = self.md5.digest()
    
        ii = iodef.Incident[0]
        
        self.confidence = ii.Assessment[0].Confidence.content
        self.severity = ii.Assessment[0].Impact[0].severity
        self.addr_type = ii.EventData[0].Flow[0].System[0].Node.Address[0].category
        
        # ipv4 addresses and networks
        
        if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net:
            self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content
            self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr)

            if 'Port' in ii.EventData[0].Flow[0].System[0].Service:
                self.port = ii.EventData[0].Flow[0].System[0].Service.Port
            if 'ip_proto' in ii.EventData[0].Flow[0].System[0].Service:
                self.proto = ii.EventData[0].Flow[0].System[0].Service.ip_protocol
                
            for i in ii.EventData[0].Flow[0].System[0].AdditionalData:
                    if i.meaning == 'prefix':
                        self.prefix = i.content
                    elif i.meaning == 'asn':
                        self.asn = i.content
                    elif i.meaning == 'asn_desc':
                        self.asn_desc = i.content
                    elif i.meaning == 'rir':
                        self.rir = i.content
                    elif i.meaning == 'cc':
                        self.cc = i.content
        
            self.empty = False
            
        # ipv6 addresses and networks
        
        elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net:
            self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content
            self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr)
            
            for i in ii.EventData[0].Flow[0].System[0].AdditionalData:
                    if i.meaning == 'prefix':
                        self.prefix = i.content
                    elif i.meaning == 'asn':
                        self.asn = i.content
                    elif i.meaning == 'asn_desc':
                        self.asn_desc = i.content
                    elif i.meaning == 'rir':
                        self.rir = i.content
                    elif i.meaning == 'cc':
                        self.cc = i.content
        
            self.empty = False