def __init__(self, mongo_host, mongo_port, database_name, mongo_user=None, mongo_password=None, mongo_auth_mechanism=None): logger.info('Connecting to mongodb, using "{0}" as database.'.format( database_name)) if mongo_user is not None: conn = MongoClient(host=mongo_host, port=mongo_port, auto_start_request=False) self.rg = ReportGenerator(mongo_host, mongo_port, database_name, mongo_user, mongo_password, mongo_auth_mechanism) self.db = conn[database_name] self.db.authenticate(mongo_user, mongo_password, mechanism=mongo_auth_mechanism) else: conn = MongoClient(host=mongo_host, port=mongo_port, auto_start_request=False) self.rg = ReportGenerator(mongo_host, mongo_port, database_name) self.db = conn[database_name] self.ensure_index()
def __init__(self, database_name): logger.info('Connecting to mongodb, using "{0}" as database.'.format( database_name)) conn = MongoClient(auto_start_request=False) self.rg = ReportGenerator(database_name) self.db = conn[database_name] self.ensure_index()
def __init__(self, database_name): logger.info('Connecting to mongodb, using "{0}" as database.'.format(database_name)) conn = MongoClient(auto_start_request=False) self.rg = ReportGenerator(database_name) self.db = conn[database_name] self.ensure_index()
class MnemoDB(object): def __init__(self, database_name): logger.info('Connecting to mongodb, using "{0}" as database.'.format(database_name)) conn = MongoClient(auto_start_request=False) self.rg = ReportGenerator(database_name) self.db = conn[database_name] self.ensure_index() def ensure_index(self): self.db.hpfeed.ensure_index([("normalized", 1), ("last_error", 1)], unique=False, background=True) self.db.url.ensure_index("url", unique=True, background=True) self.db.url.ensure_index("extractions.hashes.md5", unique=False, background=True) self.db.url.ensure_index("extractions.hashes.sha1", unique=False, background=True) self.db.url.ensure_index("extractions.hashes.sha512", unique=False, background=True) self.db.file.ensure_index("hashes", unique=True, background=True) self.db.dork.ensure_index("content", unique=False, background=True) self.db.session.ensure_index("protocol", unique=False, background=True) self.db.session.ensure_index("source_ip", unique=False, background=True) self.db.session.ensure_index("source_port", unique=False, background=True) self.db.session.ensure_index("destination_port", unique=False, background=True) self.db.session.ensure_index("destination_ip", unique=False, background=True) self.db.session.ensure_index("source_port", unique=False, background=True) self.db.session.ensure_index("honeypot", unique=False, background=True) self.db.session.ensure_index("timestamp", unique=False, background=True) self.db.daily_stats.ensure_index([("channel", 1), ("date", 1)]) def insert_normalized(self, ndata, hpfeed_id): assert isinstance(hpfeed_id, ObjectId) # ndata is a collection of dictionaries for item in ndata: # key = collection name, value = content for collection, document in item.items(): if collection is "url": if "extractions" in document: self.db[collection].update( {"url": document["url"]}, {"$pushAll": {"extractions": document["extractions"]}, "$push": {"hpfeeds_ids": hpfeed_id}}, upsert=True, ) else: self.db[collection].update( {"url": document["url"]}, {"$push": {"hpfeeds_ids": hpfeed_id}}, upsert=True ) elif collection is "file": self.db[collection].update( {"hashes.sha512": document["hashes"]["sha512"]}, {"$set": document, "$push": {"hpfeed_ids": hpfeed_id}}, upsert=True, ) elif collection is "session": document["hpfeed_id"] = hpfeed_id self.db[collection].insert(document) elif collection is "dork": self.db[collection].update( {"content": document["content"], "type": document["type"]}, {"$set": {"lasttime": document["timestamp"]}, "$inc": {"count": document["count"]}}, upsert=True, ) else: raise Warning("{0} is not a know collection type.".format(collection)) # if we end up here everything if ok - setting hpfeed entry to normalized self.db.hpfeed.update( {"_id": hpfeed_id}, {"$set": {"normalized": True}, "$unset": {"last_error": 1, "last_error_timestamp": 1}} ) def insert_hpfeed(self, ident, channel, payload): # thanks rep! # mongo can't handle non-utf-8 strings, therefore we must encode # raw binaries if [i for i in payload[:20] if i not in string.printable]: payload = str(payload).encode("hex") else: payload = str(payload) entry = { "channel": channel, "ident": ident, "payload": payload, "timestamp": datetime.utcnow(), "normalized": False, } try: self.db.hpfeed.insert(entry) except InvalidStringData as err: logger.error( "Failed to insert hpfeed data on {0} channel due to invalid string data. ({1})".format( entry["channel"], err ) ) self.rg.hpfeeds(entry) def hpfeed_set_errors(self, items): """Marks hpfeeds entries in the datastore as having errored while normalizing. :param items: a list of hpfeed entries. """ for item in items: self.db.hpfeed.update( {"_id": item["_id"]}, {"$set": {"last_error": str(item["last_error"]), "last_error_timestamp": item["last_error_timestamp"]}}, ) def get_hpfeed_data(self, get_before_id, max=250, max_scan=10000): """Fetches unnormalized hpfeed items from the datastore. :param max: maximum number of entries to return :param get_before_id: only return entries which are below the value of this ObjectId :return: a list of dictionaries """ data = list( self.db.hpfeed.find( {"_id": {"$lt": get_before_id}, "normalized": False, "last_error": {"$exists": False}}, limit=max, sort=[("_id", -1)], max_scan=max_scan, ) ) return data def reset_normalized(self): """Deletes all normalized data from the datastore.""" logger.info("Initiating database reset - all normalized data will be deleted. (Starting timer)") start = time.time() for collection in self.db.collection_names(): if collection not in ["system.indexes", "hpfeed", "hpfeeds"]: logger.warning("Dropping collection: {0}.".format(collection)) self.db.drop_collection(collection) logger.info("All collections dropped. (Elapse: {0})".format(time.time() - start)) logger.info("Dropping indexes before bulk operation.") self.db.hpfeed.drop_indexes() logger.info("Indexes dropped(Elapse: {0}).".format(time.time() - start)) logger.info("Resetting normalization flags from hpfeeds collection.") self.db.hpfeed.update( {}, {"$set": {"normalized": False}, "$unset": {"last_error": 1, "last_error_timestamp": 1}}, multi=True ) logger.info("Done normalization flags from hpfeeds collection.(Elapse: {0}).".format(time.time() - start)) logger.info("Recreating indexes.") self.ensure_index() logger.info("Done recreating indexes (Elapse: {0})".format(time.time() - start)) logger.info("Full reset done in {0} seconds".format(time.time() - start)) # This is a one-off job to generate stats for hpfeeds which takes a while. Greenlet.spawn(self.rg.do_legacy_hpfeeds) def collection_count(self): result = {} for collection in self.db.collection_names(): if collection not in ["system.indexes"]: count = self.db[collection].count() result[collection] = count return result def get_hpfeed_error_count(self): count = self.db.hpfeed.find({"last_error": {"$exists": 1}}).count() return count
class MnemoDB(object): def __init__(self, database_name): logger.info('Connecting to mongodb, using "{0}" as database.'.format( database_name)) conn = MongoClient(auto_start_request=False) self.rg = ReportGenerator(database_name) self.db = conn[database_name] self.ensure_index() def ensure_index(self): self.db.hpfeed.ensure_index([('normalized', 1), ('last_error', 1)], unique=False, background=True) self.db.url.ensure_index('url', unique=True, background=True) self.db.url.ensure_index('extractions.hashes.md5', unique=False, background=True) self.db.url.ensure_index('extractions.hashes.sha1', unique=False, background=True) self.db.url.ensure_index('extractions.hashes.sha512', unique=False, background=True) self.db.file.ensure_index('hashes', unique=True, background=True) self.db.dork.ensure_index('content', unique=False, background=True) self.db.session.ensure_index('protocol', unique=False, background=True) self.db.session.ensure_index('source_ip', unique=False, background=True) self.db.session.ensure_index('source_port', unique=False, background=True) self.db.session.ensure_index('destination_port', unique=False, background=True) self.db.session.ensure_index('destination_ip', unique=False, background=True) self.db.session.ensure_index('source_port', unique=False, background=True) self.db.session.ensure_index('honeypot', unique=False, background=True) self.db.session.ensure_index('timestamp', unique=False, background=True) self.db.daily_stats.ensure_index([('channel', 1), ('date', 1)]) def insert_normalized(self, ndata, hpfeed_id): assert isinstance(hpfeed_id, ObjectId) #ndata is a collection of dictionaries for item in ndata: #key = collection name, value = content for collection, document in item.items(): if collection is 'url': if 'extractions' in document: self.db[collection].update({'url': document['url']}, { '$pushAll': { 'extractions': document['extractions'] }, '$push': { 'hpfeeds_ids': hpfeed_id } }, upsert=True) else: self.db[collection].update( {'url': document['url']}, {'$push': { 'hpfeeds_ids': hpfeed_id }}, upsert=True) elif collection is 'file': self.db[collection].update( {'hashes.sha512': document['hashes']['sha512']}, { '$set': document, '$push': { 'hpfeed_ids': hpfeed_id } }, upsert=True) elif collection is 'session': document['hpfeed_id'] = hpfeed_id self.db[collection].insert(document) elif collection is 'dork': self.db[collection].update( { 'content': document['content'], 'type': document['type'] }, { '$set': { 'lasttime': document['timestamp'] }, '$inc': { 'count': document['count'] } }, upsert=True) else: raise Warning('{0} is not a know collection type.'.format( collection)) #if we end up here everything if ok - setting hpfeed entry to normalized self.db.hpfeed.update({'_id': hpfeed_id}, { '$set': { 'normalized': True }, '$unset': { 'last_error': 1, 'last_error_timestamp': 1 } }) def insert_hpfeed(self, ident, channel, payload): #thanks rep! #mongo can't handle non-utf-8 strings, therefore we must encode #raw binaries if [i for i in payload[:20] if i not in string.printable]: payload = str(payload).encode('hex') else: payload = str(payload) entry = { 'channel': channel, 'ident': ident, 'payload': payload, 'timestamp': datetime.utcnow(), 'normalized': False } try: self.db.hpfeed.insert(entry) except InvalidStringData as err: logger.error( 'Failed to insert hpfeed data on {0} channel due to invalid string data. ({1})' .format(entry['channel'], err)) self.rg.hpfeeds(entry) def hpfeed_set_errors(self, items): """Marks hpfeeds entries in the datastore as having errored while normalizing. :param items: a list of hpfeed entries. """ for item in items: self.db.hpfeed.update({'_id': item['_id']}, { '$set': { 'last_error': str(item['last_error']), 'last_error_timestamp': item['last_error_timestamp'] } }) def get_hpfeed_data(self, get_before_id, max=250, max_scan=10000): """Fetches unnormalized hpfeed items from the datastore. :param max: maximum number of entries to return :param get_before_id: only return entries which are below the value of this ObjectId :return: a list of dictionaries """ data = list( self.db.hpfeed.find( { '_id': { '$lt': get_before_id }, 'normalized': False, 'last_error': { '$exists': False } }, limit=max, sort=[('_id', -1)], max_scan=max_scan)) return data def reset_normalized(self): """Deletes all normalized data from the datastore.""" logger.info( 'Initiating database reset - all normalized data will be deleted. (Starting timer)' ) start = time.time() for collection in self.db.collection_names(): if collection not in ['system.indexes', 'hpfeed', 'hpfeeds']: logger.warning('Dropping collection: {0}.'.format(collection)) self.db.drop_collection(collection) logger.info( 'All collections dropped. (Elapse: {0})'.format(time.time() - start)) logger.info('Dropping indexes before bulk operation.') self.db.hpfeed.drop_indexes() logger.info('Indexes dropped(Elapse: {0}).'.format(time.time() - start)) logger.info('Resetting normalization flags from hpfeeds collection.') self.db.hpfeed.update({}, { "$set": { 'normalized': False }, '$unset': { 'last_error': 1, 'last_error_timestamp': 1 } }, multi=True) logger.info( 'Done normalization flags from hpfeeds collection.(Elapse: {0}).'. format(time.time() - start)) logger.info('Recreating indexes.') self.ensure_index() logger.info( 'Done recreating indexes (Elapse: {0})'.format(time.time() - start)) logger.info('Full reset done in {0} seconds'.format(time.time() - start)) #This is a one-off job to generate stats for hpfeeds which takes a while. Greenlet.spawn(self.rg.do_legacy_hpfeeds) def collection_count(self): result = {} for collection in self.db.collection_names(): if collection not in ['system.indexes']: count = self.db[collection].count() result[collection] = count return result def get_hpfeed_error_count(self): count = self.db.hpfeed.find({'last_error': {'$exists': 1}}).count() return count
class MnemoDB(object): def __init__(self, database_name): logger.info('Connecting to mongodb, using "{0}" as database.'.format(database_name)) conn = MongoClient(auto_start_request=False) self.rg = ReportGenerator(database_name) self.db = conn[database_name] self.ensure_index() def ensure_index(self): self.db.hpfeed.ensure_index([('normalized', 1), ('last_error', 1)], unique=False, background=True) self.db.url.ensure_index('url', unique=True, background=True) self.db.url.ensure_index('extractions.hashes.md5', unique=False, background=True) self.db.url.ensure_index('extractions.hashes.sha1', unique=False, background=True) self.db.url.ensure_index('extractions.hashes.sha512', unique=False, background=True) self.db.file.ensure_index('hashes', unique=True, background=True) self.db.dork.ensure_index('content', unique=False, background=True) self.db.session.ensure_index('protocol', unique=False, background=True) self.db.session.ensure_index('source_ip', unique=False, background=True) self.db.session.ensure_index('source_port', unique=False, background=True) self.db.session.ensure_index('destination_port', unique=False, background=True) self.db.session.ensure_index('destination_ip', unique=False, background=True) self.db.session.ensure_index('source_port', unique=False, background=True) self.db.session.ensure_index('honeypot', unique=False, background=True) self.db.session.ensure_index('timestamp', unique=False, background=True) self.db.session.ensure_index('identifier', unique=False, background=True) self.db.daily_stats.ensure_index([('channel', 1), ('date', 1)]) def insert_normalized(self, ndata, hpfeed_id, identifier=None): assert isinstance(hpfeed_id, ObjectId) #ndata is a collection of dictionaries for item in ndata: #key = collection name, value = content for collection, document in item.items(): if collection is 'url': if 'extractions' in document: self.db[collection].update({'url': document['url']}, {'$pushAll': {'extractions': document['extractions']}, '$push': {'hpfeeds_ids': hpfeed_id}}, upsert=True) else: self.db[collection].update({'url': document['url']}, {'$push': {'hpfeeds_ids': hpfeed_id}}, upsert=True) elif collection is 'file': self.db[collection].update({'hashes.sha512': document['hashes']['sha512']}, {'$set': document, '$push': {'hpfeed_ids': hpfeed_id}}, upsert=True) elif collection is 'session': document['hpfeed_id'] = hpfeed_id if identifier: document['identifier'] = identifier self.db[collection].insert(document) elif collection is 'dork': self.db[collection].update({'content': document['content'], 'type': document['type']}, {'$set': {'lasttime': document['timestamp']}, '$inc': {'count': document['count']}}, upsert=True) else: raise Warning('{0} is not a know collection type.'.format(collection)) #if we end up here everything if ok - setting hpfeed entry to normalized self.db.hpfeed.update({'_id': hpfeed_id}, {'$set': {'normalized': True}, '$unset': {'last_error': 1, 'last_error_timestamp': 1}}) def insert_hpfeed(self, ident, channel, payload): #thanks rep! #mongo can't handle non-utf-8 strings, therefore we must encode #raw binaries if [i for i in payload[:20] if i not in string.printable]: payload = str(payload).encode('hex') else: payload = str(payload) entry = {'channel': channel, 'ident': ident, 'payload': payload, 'timestamp': datetime.utcnow(), 'normalized': False} try: self.db.hpfeed.insert(entry) except InvalidStringData as err: logger.error( 'Failed to insert hpfeed data on {0} channel due to invalid string data. ({1})'.format(entry['channel'], err)) self.rg.hpfeeds(entry) def hpfeed_set_errors(self, items): """Marks hpfeeds entries in the datastore as having errored while normalizing. :param items: a list of hpfeed entries. """ for item in items: self.db.hpfeed.update({'_id': item['_id']}, {'$set': {'last_error': str(item['last_error']), 'last_error_timestamp': item['last_error_timestamp']} }) def get_hpfeed_data(self, get_before_id, max=250, max_scan=10000): """Fetches unnormalized hpfeed items from the datastore. :param max: maximum number of entries to return :param get_before_id: only return entries which are below the value of this ObjectId :return: a list of dictionaries """ data = list(self.db.hpfeed.find({'_id': {'$lt': get_before_id}, 'normalized': False, 'last_error': {'$exists': False}}, limit=max, sort=[('_id', -1)], max_scan=max_scan)) return data def reset_normalized(self): """Deletes all normalized data from the datastore.""" logger.info('Initiating database reset - all normalized data will be deleted. (Starting timer)') start = time.time() for collection in self.db.collection_names(): if collection not in ['system.indexes', 'hpfeed', 'hpfeeds']: logger.warning('Dropping collection: {0}.'.format(collection)) self.db.drop_collection(collection) logger.info('All collections dropped. (Elapse: {0})'.format(time.time() - start)) logger.info('Dropping indexes before bulk operation.') self.db.hpfeed.drop_indexes() logger.info('Indexes dropped(Elapse: {0}).'.format(time.time() - start)) logger.info('Resetting normalization flags from hpfeeds collection.') self.db.hpfeed.update({}, {"$set": {'normalized': False}, '$unset': {'last_error': 1, 'last_error_timestamp': 1}}, multi=True) logger.info('Done normalization flags from hpfeeds collection.(Elapse: {0}).'.format(time.time() - start)) logger.info('Recreating indexes.') self.ensure_index() logger.info('Done recreating indexes (Elapse: {0})'.format(time.time() - start)) logger.info('Full reset done in {0} seconds'.format(time.time() - start)) #This is a one-off job to generate stats for hpfeeds which takes a while. Greenlet.spawn(self.rg.do_legacy_hpfeeds) def collection_count(self): result = {} for collection in self.db.collection_names(): if collection not in ['system.indexes']: count = self.db[collection].count() result[collection] = count return result def get_hpfeed_error_count(self): count = self.db.hpfeed.find({'last_error': {'$exists': 1}}).count() return count