class Log(object): def __init__ (self, connectionPool, myhost = None, debug = 0): self.debug = debug self.pool = connectionPool if myhost != None: self.myhost = myhost else: self.myhost = socket.gethostname() self.registry = Registry(connectionPool, debug) self.num_servers = self.registry.get('hadoop.num_servers') if self.num_servers == None: self.num_servers = 1 self.salt = Salt(self.num_servers, self.debug) def L(self, msg): self.salt.next() try: rowkey = struct.pack(">HI", self.salt.next(), int(time.time())) rowdict = { 'b:hostname': str(self.myhost), 'b:msg': str(msg) } with self.pool.connection() as connection: connection.table('log').put(rowkey, rowdict) except Exception as e: print "failed to put record to 'log' table: ", rowdict
def __init__ (self, connection, num_servers = 1, debug = 0): self.debug = debug self.dbh = connection self.num_servers = num_servers t = self.dbh.tables() if not "infrastructure_botnet" in t: raise Exception("missing infrastructure_botnet table") self.table = connection.table('infrastructure_botnet').batch(batch_size=5) self.reset() self.md5 = hashlib.md5() self.salt = Salt(self.num_servers, self.debug)
def __init__ (self, connectionPool, myhost = None, debug = 0): self.debug = debug self.pool = connectionPool if myhost != None: self.myhost = myhost else: self.myhost = socket.gethostname() self.registry = Registry(connectionPool, debug) self.num_servers = self.registry.get('hadoop.num_servers') if self.num_servers == None: self.num_servers = 1 self.salt = Salt(self.num_servers, self.debug)
def __init__ (self, connectionPool, index_type, num_servers = 1, table_batch_size = 1000, debug = 0): self.debug = debug print "indexer connect" self.pool = connectionPool print "indexer load primary index map" self.primary_index = PrimaryIndex(connectionPool, debug) print "index init log" self.log = Log(connectionPool) self.num_servers = num_servers self.packers = {} for packer in self.primary_index.names(): try: package='DB.PrimaryIndex.PackUnpack' self.L("loading packer " + package + "." + packer) __import__(package + "." + packer) pkg = sys.modules[package + "." + packer] self.packers[packer] = getattr(pkg, packer) except ImportError as e: self.L("warning: failed to load " + packer) with self.pool.connection() as dbh: t = dbh.tables() self.table_name = "index_" + index_type if not self.table_name in t: self.L("index table %s doesnt exist, creating it" % (self.table_name)) dbh.create_table(self.table_name, {'b': {'COMPRESSION': 'SNAPPY'}}) table_batch_size = 5 self.table = dbh.table(self.table_name).batch(batch_size=table_batch_size) self.co_table = dbh.table("cif_objs").batch(batch_size=table_batch_size) self.reset() self.md5 = hashlib.md5() self.salt = Salt(self.num_servers, self.debug)
class Indexer(object): """ """ def __init__ (self, connectionPool, index_type, num_servers = 1, table_batch_size = 1000, debug = 0): self.debug = debug print "indexer connect" self.pool = connectionPool print "indexer load primary index map" self.primary_index = PrimaryIndex(connectionPool, debug) print "index init log" self.log = Log(connectionPool) self.num_servers = num_servers self.packers = {} for packer in self.primary_index.names(): try: package='DB.PrimaryIndex.PackUnpack' self.L("loading packer " + package + "." + packer) __import__(package + "." + packer) pkg = sys.modules[package + "." + packer] self.packers[packer] = getattr(pkg, packer) except ImportError as e: self.L("warning: failed to load " + packer) with self.pool.connection() as dbh: t = dbh.tables() self.table_name = "index_" + index_type if not self.table_name in t: self.L("index table %s doesnt exist, creating it" % (self.table_name)) dbh.create_table(self.table_name, {'b': {'COMPRESSION': 'SNAPPY'}}) table_batch_size = 5 self.table = dbh.table(self.table_name).batch(batch_size=table_batch_size) self.co_table = dbh.table("cif_objs").batch(batch_size=table_batch_size) self.reset() self.md5 = hashlib.md5() self.salt = Salt(self.num_servers, self.debug) def L(self, msg): caller = ".".join([str(__name__), sys._getframe(1).f_code.co_name]) if self.debug != None: print caller + ": " + msg else: self.log.L(caller + ": " + msg) def pack_rowkey_ipv4(self, salt, addr): return struct.pack(">HB", self.salt.next(), self.TYPE_IPV4()) + self.packers['ipv4'].pack(addr) def pack_rowkey_ipv6(self, salt, addr): return struct.pack(">HB", self.salt.next(), self.TYPE_IPV6()) + self.packers['ipv6'].pack(addr) def pack_rowkey_fqdn(self, salt, fqdn): return struct.pack(">HB", self.salt.next(), self.TYPE_FQDN()) + self.packers['domain'].pack(fqdn) def pack_rowkey_url(self, salt, url): return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['url'].pack(url) def pack_rowkey_email(self, salt, email): return struct.pack(">HB", self.salt.next(), self.TYPE_URL()) + self.packers['email'].pack(email) def pack_rowkey_search(self, salt, search): return struct.pack(">HB", self.salt.next(), self.TYPE_SEARCH()) + self.packers['search'].pack(search) def pack_rowkey_malware(self, salt, malware_hash): return struct.pack(">HB", self.salt.next(), self.TYPE_MALWARE()) + self.packers['malware'].pack(malware_hash) def pack_rowkey_asn(self, salt, asn): return struct.pack(">HB", self.salt.next(), self.TYPE_ASN()) + self.packers['asn'].pack(asn) def reset(self): self.empty = True self.addr = None self.rowkey = None self.confidence = None self.addr_type = None self.iodef_rowkey = None def commit(self): """ Commit the record to the index_* table Update cif_objs(rowkey=self.iodef_rowkey) so that 'b:{self.table_name}_{self.rowkey}' = 1 Purger will remove the reference when this feed record is purged. With hbase, you can put an addt'l cell value into a table/row without having to merge. Existing cells won't be affected. """ try: rowdict = { 'b:confidence': str(self.confidence), 'b:addr_type': str(self.addr_type), 'b:iodef_rowkey': str(self.iodef_rowkey) } self.table.put(self.rowkey, rowdict) fmt = "%ds" % (len(self.table_name) + 4) prk = struct.pack(fmt, "cf:" + str(self.table_name) + "_") + self.rowkey self.co_table.put(self.iodef_rowkey, { prk: "1" }) except Exception as e: self.L("failed to put record to %s table: " % self.table_name) print e self.reset() def extract(self, iodef_rowkey, iodef): """ FIX atm this is iodef specific. ideally we will be able to index other document types """ self.reset() self.iodef_rowkey = iodef_rowkey self.md5.update(iodef.SerializeToString()) self.hash = self.md5.digest() ii = iodef.Incident[0] #print ii self.confidence = ii.Assessment[0].Confidence.content self.severity = ii.Assessment[0].Impact[0].severity # for malware hashes, they appear at the top level for now # iodef.incident[].additionaldata.meaning = "malware hash" # iodef.incident[].additionaldata.content = "[the hash]" if hasattr(ii, 'AdditionalData'): for ed in ii.AdditionalData: #print "ED ", ed if ed.meaning == "malware hash": self.L("\tIndexing for malware hash") self.rowkey = self.pack_rowkey_malware(self.salt.next(), ed.content) self.commit() # addresses and networks are in the EventData[].Flow[].System[] tree if len(ii.EventData) > 0 or hasattr(ii, 'EventData'): for ed in ii.EventData: for fl in ed.Flow: for sy in fl.System: for i in sy.Node.Address: self.addr_type = i.category if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net: self.addr = i.content self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr) self.L("Indexing for ipv4") self.commit() # ipv6 addresses and networks elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net: self.addr = i.content self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr) self.L("Indexing for ipv6") self.commit() elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_asn: self.addr = i.content self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr) self.L("Indexing for ASN") self.commit() elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ext_value: if i.ext_category == "fqdn": self.fqdn = i.content self.rowkey = self.pack_rowkey_fqdn(self.salt.next(), self.fqdn) self.L("Indexing for FQDDN") self.commit() elif i.ext_category == "url": self.rowkey = self.pack_rowkey_url(self.salt.next(), i.content) self.L("Indexing for URL") self.commit() else: e = self.primary_index.enum(i.ext_category) if len(e) > 0: self.rowkey = struct.pack(">HB", self.salt.next(), e[0]) + self.packers[i.ext_category].pack(i.content) self.commit() else: self.L("Unknown primary index given " + i.ext_category) else: print "unhandled category: ", i def TYPE_IPV4(self): return self.primary_index.enum('ipv4') def TYPE_IPV6(self): return self.primary_index.enum('ipv6') def TYPE_FQDN(self): return self.primary_index.enum('domain') def TYPE_URL(self): return self.primary_index.enum('url') def TYPE_EMAIL(self): return self.primary_index.enum('email') def TYPE_SEARCH(self): return self.primary_index.enum('search') def TYPE_MALWARE(self): return self.primary_index.enum('malware') def TYPE_ASN(self): return self.primary_index.enum('asn')
try: print "Connect to HBase" connectionPool = HBConnection(hbhost) with connectionPool.connection() as connection: cif_objs = connection.table('cif_objs').batch(batch_size=5) # set very low for development, set to 1000+ for test/qa/prod cif_idl = connection.table('cif_idl') print "Init Registry" registry = Registry(connectionPool, debug) num_servers = registry.get('hadoop.num_servers') if num_servers == None: num_servers = 1 print "hadoop.num_servers not set. defaulting." print "hadoop.num_servers = ", num_servers salt = Salt(num_servers, debug) thread_tracker = ThreadTracker(debug) global apikeys log = Log(connectionPool) log.L("cif-db initializing") print "Initializing APIKeys object" apikeys = APIKeys(connection, True) print "Resolving our APIKey: " + myid apikey = apikeys.get_by_alias(myid)
class Botnet(object): """ tablename: infrastructure_botnet key: salt + address or salt + fqdn address is left padded with nulls into a 16 byte field fqdn is simply appended columns: b:prefix, asn, asn_desc, rir, cc, confidence, addr_type, port, ip_proto """ def __init__ (self, connection, num_servers = 1, debug = 0): self.debug = debug self.dbh = connection self.num_servers = num_servers t = self.dbh.tables() if not "infrastructure_botnet" in t: raise Exception("missing infrastructure_botnet table") self.table = connection.table('infrastructure_botnet').batch(batch_size=5) self.reset() self.md5 = hashlib.md5() self.salt = Salt(self.num_servers, self.debug) def L(self, msg): caller = ".".join([str(__name__), sys._getframe(1).f_code.co_name]) if self.debug != None: print caller + ": " + msg else: syslog.syslog(caller + ": " + msg) def pack_rowkey_ipv4(self, salt, addr): """ rowkey: salt (2 bytes) + keytype(0x0=ipv4) + packedaddr(4 bytes) """ if re.match(r'^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$', addr) != None: a = addr.split(".") b = int(a[0])<<24 | int(a[1])<<16 | int(a[2])<<8 | int(a[3]) print "making rowkey for ", self.addr, " int=", b return struct.pack(">HBI", self.salt.next(), 0x0, b) else: raise Exception("Not an ipv4 addr: " + addr) def pack_rowkey_ipv6(self, salt, addr): """ rowkey: salt (2 bytes) + keytype(0x1=ipv6) + packedaddr(16 bytes) """ return struct.pack(">HBIIII", self.salt.next(), 0x1, self.addr) def pack_rowkey_fqdn(self, salt, fqdn): """ rowkey: salt (2 bytes) + keytype(0x2=fqdn) + string """ return struct.pack(">HBs", self.salt.next(), 0x2, fqdn) def reset(self): self.empty = True self.prefix = None self.asn = None self.asn_desc = None self.rir = None self.cc = None self.addr = None self.rowkey = None self.confidence = None self.addr_type = None self.port = None self.proto = None self.hash = None def commit(self): if self.empty == False: self.L("committing") try: self.table.put(self.rowkey, { 'b:prefix': str(self.prefix), 'b:asn': str(self.asn), 'b:asn_desc': str(self.asn_desc), 'b:rir': str(self.rir), 'b:cc': str(self.cc), 'b:confidence': str(self.confidence), 'b:addr_type': str(self.addr_type), 'b:port': str(self.port), 'b:proto': str(self.proto) }) except Exception as e: self.L("failed to put record to infra_botnet table: ") print e else: self.L("nothing to commit") def extract(self, iodef): self.reset() self.md5.update(iodef.SerializeToString()) self.hash = self.md5.digest() ii = iodef.Incident[0] self.confidence = ii.Assessment[0].Confidence.content self.severity = ii.Assessment[0].Impact[0].severity self.addr_type = ii.EventData[0].Flow[0].System[0].Node.Address[0].category # ipv4 addresses and networks if self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv4_net: self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content self.rowkey = self.pack_rowkey_ipv4(self.salt.next(), self.addr) if 'Port' in ii.EventData[0].Flow[0].System[0].Service: self.port = ii.EventData[0].Flow[0].System[0].Service.Port if 'ip_proto' in ii.EventData[0].Flow[0].System[0].Service: self.proto = ii.EventData[0].Flow[0].System[0].Service.ip_protocol for i in ii.EventData[0].Flow[0].System[0].AdditionalData: if i.meaning == 'prefix': self.prefix = i.content elif i.meaning == 'asn': self.asn = i.content elif i.meaning == 'asn_desc': self.asn_desc = i.content elif i.meaning == 'rir': self.rir = i.content elif i.meaning == 'cc': self.cc = i.content self.empty = False # ipv6 addresses and networks elif self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_addr or self.addr_type == RFC5070_IODEF_v1_pb2.AddressType.Address_category_ipv6_net: self.addr = ii.EventData[0].Flow[0].System[0].Node.Address[0].content self.rowkey = self.pack_rowkey_ipv6(self.salt.next(), self.addr) for i in ii.EventData[0].Flow[0].System[0].AdditionalData: if i.meaning == 'prefix': self.prefix = i.content elif i.meaning == 'asn': self.asn = i.content elif i.meaning == 'asn_desc': self.asn_desc = i.content elif i.meaning == 'rir': self.rir = i.content elif i.meaning == 'cc': self.cc = i.content self.empty = False