def generate_backdoors(): """ Generate backdoors mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() emit({name: this.backdoor.name} ,{count: 1});}) }', {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: backdoors = samples.inline_map_reduce( m, r, query={"backdoor.name": { "$ne": "None" }}) except: return backdoor_details = mongo_connector(settings.COL_BACKDOOR_DETAILS) for backdoor in backdoors: backdoor_details.update( {"name": backdoor["_id"]["name"]}, {"$set": { "sample_count": backdoor["value"]["count"] }})
def generate_yara_hits(): """ Generate yara hits mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) map_code = """ function() { this.analysis.forEach(function(z) { if ("results" in z && z.service_name == "yara") { z.results.forEach(function(x) { emit({engine: z.service_name, version: x.version, result: x.result} ,{count: 1}); }) } }) } """ m = Code(map_code, {}) r = Code('function(k,v) { var count=0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: yarahits = samples.inline_map_reduce(m, r, query={'analysis.service_name': 'yara'}) except: return yarahits_col = mongo_connector(settings.COL_YARAHITS) yarahits_col.drop() sv = YaraHit._meta['latest_schema_version'] for hit in yarahits: yarahits_col.update({'engine': hit["_id"]["engine"], "version": hit["_id"]["version"], "result": hit["_id"]["result"]}, {"$set": {"sample_count": hit["value"]["count"], "schema_version": sv}}, True, False)
def generate_sources(): """ Generate sources mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code( 'function() { this.source.forEach(function(z) {emit({name: z.name}, {count: 1});}) }', {}) r = Code( 'function(k,v) { var count=0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: sources = samples.inline_map_reduce( m, r, query={"source.name": { "$exists": 1 }}) except: return source_access = mongo_connector(settings.COL_SOURCE_ACCESS) for source in sources: source_access.update( {"name": source["_id"]["name"]}, {"$set": { "sample_count": source["value"]["count"] }})
def prep_indexes(): """ Update indexing. """ notifications = mongo_connector(settings.COL_NOTIFICATIONS) # auto-expire notifications after 30 days notifications.ensure_index("obj_id", background=True, expireAfterSeconds=2592000) notifications.ensure_index("users", background=True) print "Notification indexes created." screenshots = mongo_connector(settings.COL_SCREENSHOTS) screenshots.ensure_index("tags", background=True) print "Screenshot indexes created." # check for old invalid chunk indexes and fix for col in ("%s.chunks" % settings.COL_OBJECTS, "%s.chunks" % settings.COL_PCAPS, "%s.chunks" % settings.COL_SAMPLES): c = mongo_connector(col) d = c.index_information() if d.get('files_id_1_n_1', False): b = d['files_id_1_n_1'].get('background', None) # background could be set to False or True in the DB if b is not None: c.drop_index("files_id_1_n_1") c.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) print "Found bad index for %s. Fixed it." % col
def generate_exploits(): """ Generate exploits mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code( 'function() { this.exploit.forEach(function(z) {emit({cve: z.cve} ,{count: 1});}) }', {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: exploits = samples.inline_map_reduce( m, r, query={"exploit.cve": { "$exists": 1 }}) except: return exploit_details = mongo_connector(settings.COL_EXPLOIT_DETAILS) for exploit in exploits: exploit_details.update( {"name": exploit["_id"]["cve"]}, {"$set": { "sample_count": exploit["value"]["count"] }})
def campaign_date_stats(): """ Generate Campaign date stats. """ emails = mongo_connector(settings.COL_EMAIL) mapcode = """ function () { try { if ("campaign" in this) { stats = {}; if ("isodate" in this) { var d = new Date(this.isodate); stats[new Date(d.getFullYear(), d.getMonth()).getTime()] = 1; } else { stats[new Date(this.source[0].instances[0].date.getFullYear(), this.source[0].instances[0].date.getMonth()).getTime()] = 1; } emit({campaign:this.campaign[0].name}, stats); } } catch (err) {} } """ reducecode = """ function reduce(key, values) { var out = {}; function merge(a, b) { for (var k in b) { if (!b.hasOwnProperty(k)) { continue; } a[k] = (a[k] || 0) + b[k]; } } for (var i=0; i < values.length; i++) { merge(out, values[i]); } return out; } """ m = Code(mapcode, {}) r = Code(reducecode, {}) results = emails.inline_map_reduce(m, r) stat_coll = mongo_connector(settings.COL_STATISTICS) stats = {} stats["results"] = [] for result in results: stats["results"].append({ "campaign": result["_id"]["campaign"], "value": result["value"] }) stat_coll.update({'name': 'campaign_monthly'}, {"$set": stats}, upsert=True)
def generate_campaign_stats(source_name=None): """ Generate campaign stats. :param source_name: Limit to a specific source. :type source_name: None, str """ # build the query used in the mapreduces stat_query = {} stat_query["campaign.name"] = {"$exists": "true"} if source_name: stat_query["source.name"] = source_name campaigns = mongo_connector(settings.COL_CAMPAIGNS) domains = mongo_connector(settings.COL_DOMAINS) emails = mongo_connector(settings.COL_EMAIL) events = mongo_connector(settings.COL_EVENTS) indicators = mongo_connector(settings.COL_INDICATORS) ips = mongo_connector(settings.COL_IPS) pcaps = mongo_connector(settings.COL_PCAPS) samples = mongo_connector(settings.COL_SAMPLES) # generate an initial campaign listing so we can make sure all campaigns get updated campaign_listing = campaigns.find({}, {'name': 1}) # initialize each campaign to zeroed out stats campaign_stats = {} for campaign in campaign_listing: campaign_stats[campaign["name"]] = zero_campaign() mapcode = """ function() { if ("campaign" in this) { campaign_list = this.campaign; } if (campaign_list.length > 0) { campaign_list.forEach(function(c) { emit(c.name, {count: 1}); }); } } """ m = Code(mapcode, {}) r = Code( 'function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) campaign_stats = update_results(domains, m, r, stat_query, "domain_count", campaign_stats) campaign_stats = update_results(emails, m, r, stat_query, "email_count", campaign_stats) campaign_stats = update_results(events, m, r, stat_query, "event_count", campaign_stats) campaign_stats = update_results(indicators, m, r, stat_query, "indicator_count", campaign_stats) campaign_stats = update_results(ips, m, r, stat_query, "ip_count", campaign_stats) campaign_stats = update_results(pcaps, m, r, stat_query, "pcap_count", campaign_stats) campaign_stats = update_results(samples, m, r, stat_query, "sample_count", campaign_stats) # update all of the campaigns here for campaign in campaign_stats.keys(): campaigns.update({"name": campaign}, {"$set": campaign_stats[campaign]}, upsert=True)
def run(self, argv): parser = OptionParser() parser.add_option("-i", "--input", action="store", dest="input", type="string", help="input filename containing list of hashes") parser.add_option("-o", "--output", action="store", dest="output", type="string", help="output filename") parser.add_option("-t", "--hash-type", action="store", dest="hash", type="string", help="hash type") (opts, args) = parser.parse_args(argv) if opts.input: hash_list = open(opts.input, 'rb').read() hash_list = hash_list.split() if opts.hash: query_string = "%s" % opts.hash else: query_string = "md5" if hash_list: results = [] samples = mongo_connector(settings.COL_SAMPLES) for h in hash_list: h = h.strip() result = samples.find_one({query_string: h}, {'md5': 1}) if result: results.append(result["md5"]) if opts.output: fout = open(opts.output, 'wb') for result in results: fout.write(result + '\n') fout.close() else: for result in results: print result
def run(self, argv): parser = OptionParser() parser.add_option("-f", "--filter", action="store", dest="filter", type="string", help="filetype filter") (opts, args) = parser.parse_args(argv) try: samples = mongo_connector(settings.COL_SAMPLES) if opts.filter: query = ast.literal_eval(opts.filter) else: query = {} md5_list = samples.find(query, {"md5": 1}) for item in md5_list: try: if item['md5'] != None: print item['md5'] except: pass except: pass
def run(self, argv): indicators = mongo_connector(settings.COL_INDICATORS) today = datetime.datetime.today() yesterday = today - datetime.timedelta(days=1) i = indicators.find({'created': {'$gte': yesterday, '$lt': today}}, {'type': 1, 'value': 1}) for a in i: print "%s, %s" % (a['type'], a['value'])
def remove_indexes(): """ Removes all indexes from all collections. """ coll_list = [settings.COL_BUCKET_LISTS, settings.COL_CAMPAIGNS, settings.COL_COMMENTS, settings.COL_DOMAINS, settings.COL_EMAIL, settings.COL_EVENTS, settings.COL_INDICATORS, settings.COL_IPS, settings.COL_NOTIFICATIONS, '%s.files' % settings.COL_OBJECTS, '%s.chunks' % settings.COL_OBJECTS, settings.COL_PCAPS, '%s.files' % settings.COL_PCAPS, '%s.chunks' % settings.COL_PCAPS, settings.COL_SAMPLES, '%s.files' % settings.COL_SAMPLES, '%s.chunks' % settings.COL_SAMPLES, settings.COL_TARGETS, ] for coll in coll_list: print "Removing index for: %s" % coll c = mongo_connector(coll) c.drop_indexes()
def get_sample_list(self, filter, meta): try: samples = mongo_connector(settings.COL_SAMPLES, preference="secondary") md5_list = samples.find(filter, meta) except: pass return md5_list
def aggregate_object_for_collection(self, collection, sort_count): pipe = [ {"$unwind": "$objects"}, {"$group" : {"_id": {"obj_type": {"$cond": {"if": {"$and": [{"$gt":["$objects.name", None] }, {"$ne": ["$objects.type", "$objects.name"]}] }, "then": {"$concat": [ "$objects.type", " - ", "$objects.name" ]}, "else": "$objects.type" } } }, "count": {"$sum": 1} } } ] if sort_count is True: pipe.append({"$sort": {"count": 1}}) else: pipe.append({"$sort": {"_id": 1}}) db = mongo_connector(collection) results = db.aggregate(pipeline=pipe) return results
def remove_indexes(): """ Removes all indexes from all collections. """ coll_list = [ settings.COL_BUCKET_LISTS, settings.COL_CAMPAIGNS, settings.COL_COMMENTS, settings.COL_DOMAINS, settings.COL_EMAIL, settings.COL_EVENTS, settings.COL_INDICATORS, settings.COL_IPS, settings.COL_NOTIFICATIONS, '%s.files' % settings.COL_OBJECTS, '%s.chunks' % settings.COL_OBJECTS, settings.COL_PCAPS, '%s.files' % settings.COL_PCAPS, '%s.chunks' % settings.COL_PCAPS, settings.COL_SAMPLES, '%s.files' % settings.COL_SAMPLES, '%s.chunks' % settings.COL_SAMPLES, settings.COL_TARGETS, ] for coll in coll_list: print "Removing index for: %s" % coll c = mongo_connector(coll) c.drop_indexes()
def run(self, argv): print "Removing old detection results..." samples = mongo_connector(settings.COL_SAMPLES) samples.update({}, {"$unset": {'detection': 1, 'unsupported_attrs.detection': 1}}, multi=True)
def generate_exploits(): """ Generate exploits mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() { this.exploit.forEach(function(z) {emit({cve: z.cve} ,{count: 1});}) }', {}) r = Code('function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: exploits = samples.inline_map_reduce(m,r, query={"exploit.cve": {"$exists": 1}}) except: return exploit_details = mongo_connector(settings.COL_EXPLOIT_DETAILS) for exploit in exploits: exploit_details.update({"name": exploit["_id"]["cve"]}, {"$set": {"sample_count": exploit["value"]["count"]}})
def generate_backdoors(): """ Generate backdoors mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() emit({name: this.backdoor.name} ,{count: 1});}) }', {}) r = Code('function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: backdoors = samples.inline_map_reduce(m,r, query={"backdoor.name": {"$ne": "None"}}) except: return backdoor_details = mongo_connector(settings.COL_BACKDOOR_DETAILS) for backdoor in backdoors: backdoor_details.update({"name": backdoor["_id"]["name"]}, {"$set": {"sample_count": backdoor["value"]["count"]}})
def generate_campaign_stats(source_name=None): """ Generate campaign stats. :param source_name: Limit to a specific source. :type source_name: None, str """ # build the query used in the mapreduces stat_query = {} stat_query["campaign.name"] = {"$exists": "true"} if source_name: stat_query["source.name"] = source_name campaigns = mongo_connector(settings.COL_CAMPAIGNS) domains = mongo_connector(settings.COL_DOMAINS) emails = mongo_connector(settings.COL_EMAIL) events = mongo_connector(settings.COL_EVENTS) indicators = mongo_connector(settings.COL_INDICATORS) ips = mongo_connector(settings.COL_IPS) pcaps = mongo_connector(settings.COL_PCAPS) samples = mongo_connector(settings.COL_SAMPLES) # generate an initial campaign listing so we can make sure all campaigns get updated campaign_listing = campaigns.find({}, {'name': 1}) # initialize each campaign to zeroed out stats campaign_stats = {} for campaign in campaign_listing: campaign_stats[campaign["name"]] = zero_campaign() mapcode = """ function() { if ("campaign" in this) { campaign_list = this.campaign; } if (campaign_list.length > 0) { campaign_list.forEach(function(c) { emit(c.name, {count: 1}); }); } } """ m = Code(mapcode, {}) r = Code('function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) campaign_stats = update_results(domains, m, r, stat_query, "domain_count", campaign_stats) campaign_stats = update_results(emails, m, r, stat_query, "email_count", campaign_stats) campaign_stats = update_results(events, m, r, stat_query, "event_count", campaign_stats) campaign_stats = update_results(indicators, m, r, stat_query, "indicator_count", campaign_stats) campaign_stats = update_results(ips, m, r, stat_query, "ip_count", campaign_stats) campaign_stats = update_results(pcaps, m, r, stat_query, "pcap_count", campaign_stats) campaign_stats = update_results(samples, m, r, stat_query, "sample_count", campaign_stats) # update all of the campaigns here for campaign in campaign_stats.keys(): campaigns.update({"name": campaign}, {"$set": campaign_stats[campaign]}, upsert=True)
def generate_sources(): """ Generate sources mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() { this.source.forEach(function(z) {emit({name: z.name}, {count: 1});}) }', {}) r = Code('function(k,v) { var count=0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: sources = samples.inline_map_reduce(m,r, query={"source.name": {"$exists": 1}}) except: return source_access = mongo_connector(settings.COL_SOURCE_ACCESS) for source in sources: source_access.update({"name": source["_id"]["name"]}, {"$set": {"sample_count": source["value"]["count"]}})
def run(self, argv): print "Removing old detection results..." samples = mongo_connector(settings.COL_SAMPLES) samples.update( {}, {"$unset": { 'detection': 1, 'unsupported_attrs.detection': 1 }}, multi=True)
def generate_filetypes(): """ Generate filetypes mapreduce. """ samples = mongo_connector(settings.COL_SAMPLES) m = Code('function() emit({filetype: this.mimetype} ,{count: 1});}) }', {}) r = Code('function(k,v) { var count = 0; v.forEach(function(v) { count += v["count"]; }); return {count: count}; }', {}) try: samples.map_reduce(m,r, settings.COL_FILETYPES) except: return
def discover_binary(self): """ Queries GridFS for a matching binary to this sample document. """ from crits.core.mongo_tools import mongo_connector fm = mongo_connector("%s.files" % self._meta['collection']) objectid = fm.find_one({'md5': self.md5}, {'_id': 1}) if objectid: self.filedata.grid_id = objectid['_id'] self.filedata._mark_as_changed()
def discover_binary(self): """ Queries GridFS for a matching binary to this pcap document. """ from crits.core.mongo_tools import mongo_connector fm = mongo_connector("%s.files" % self._meta['collection']) objectid = fm.find_one({'md5': self.md5}, {'_id': 1}) if objectid: self.filedata.grid_id = objectid['_id'] self.filedata._mark_as_changed()
def discover_binary(self): """ Queries GridFS for a matching binary to this Certificate document. """ from crits.core.mongo_tools import mongo_connector fm = mongo_connector("%s.files" % self._meta["collection"]) objectid = fm.find_one({"md5": self.md5}, {"_id": 1}) if objectid: self.filedata.grid_id = objectid["_id"] self.filedata._mark_as_changed()
def generate_counts(): """ Generate dashboard counts. """ counts = mongo_connector(settings.COL_COUNTS) samples = mongo_connector(settings.COL_SAMPLES) emails = mongo_connector(settings.COL_EMAIL) indicators = mongo_connector(settings.COL_INDICATORS) domains = mongo_connector(settings.COL_DOMAINS) pcaps = mongo_connector(settings.COL_PCAPS) today = datetime.datetime.fromordinal(datetime.datetime.now().toordinal()) start = datetime.datetime.now() last_seven = start - datetime.timedelta(7) last_thirty = start - datetime.timedelta(30) count = {} count['Samples'] = samples.find().count() count['Emails'] = emails.find().count() count['Indicators'] = indicators.find().count() count['PCAPs'] = pcaps.find().count() count['Domains'] = domains.find().count() count['Emails Today'] = emails.find({"source.instances.date": {"$gte": today}}).count() count['Emails Last 7'] = emails.find({'source.instances.date': {'$gte': last_seven}}).count() count['Emails Last 30'] = emails.find({'source.instances.date': {'$gte': last_thirty}}).count() count['Indicators Today'] = indicators.find({"source.instances.date": {"$gte": today}}).count() count['Indicators Last 7'] = indicators.find({"source.instances.date": {"$gte": last_seven}}).count() count['Indicators Last 30'] = indicators.find({"source.instances.date": {"$gte": last_thirty}}).count() counts.update({'name': "counts"}, {'$set': {'counts': count}}, upsert=True)
def run(self, argv): parser = OptionParser() parser.add_option("-i", "--indicators", action="store_true", dest="indicators", help="copy over indicators") (opts, args) = parser.parse_args(argv) indicators = mongo_connector(settings.COL_INDICATORS) if opts.indicators: conn = pymongo.Connection() db = conn.crits coll = db.indicators prod_indicators = coll.find() for i in prod_indicators: indicators.insert(i)
def run(self, argv): samples = mongo_connector(settings.COL_SAMPLES) today = datetime.datetime.fromordinal(datetime.datetime.now().toordinal()) md5s = samples.find({"source.instances.date": {"$gte": today}}) filename = "%s/%s.tar.bz2" % ("/tmp/samples", today.strftime("%Y-%m-%d")) tar = tarfile.open(filename, "w:bz2") for md5 in md5s: m = md5['md5'] f = md5['filename'] s = get_file(m) info = tarfile.TarInfo(name="%s" % f) info.mtime = time.time() info.size = len(s) tar.addfile(info, BytesIO(s)) tar.close()
def run(self, argv): samples = mongo_connector(settings.COL_SAMPLES) today = datetime.datetime.fromordinal(datetime.datetime.now().toordinal()) md5s = samples.find({"source.instances.date": {"$gte": today}}) filename = "%s/%s.tar.bz2" % ("/tmp/samples", today.strftime("%Y-%m-%d")) tar = tarfile.open(filename, "w:bz2") for md5 in md5s: m = md5['md5'] f = md5['filename'] s = get_file(m) info = tarfile.TarInfo(name="%s" % f) info.mtime = time.time() info.size = len(s) tar.addfile(info, StringIO(s)) tar.close()
def aggregate_indicator_types(self, sort_count, pp): collection = "indicators" pipe = [ { "$group": {"_id":"$type" , "count":{"$sum": 1}}}, {"$sort": {"_id": 1}} ] if sort_count is True: pipe.append({"$sort": {"count": 1}}) else: pipe.append({"$sort": {"_id": 1}}) db = mongo_connector(collection) results = db.aggregate(pipeline=pipe) print "INDICATOR TYPES IN COLLECTION [%s]" % collection pp.pprint(results) print
def delete_object_file(value): """ In the event this is a file (but not PCAP), clean up after ourselves when deleting an object. :param value: The value of the object we are deleting. :type value: str """ if not re.match(r"^[a-f\d]{32}$", value, re.I): return #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. obj_list = ( 'Actor', 'Backdoor', 'Campaign', 'Certificate', 'Domain', 'Email', 'Event', 'Exploit', 'Indicator', 'IP', 'PCAP', 'RawData', 'Sample', 'Target', ) # In order to make sure this object isn't tied to more than one top-level # object, we need to check the rest of the database. We will at least find # one instance, which is the one we are going to be removing. If we find # another instance, then we should not remove the object from GridFS. count = 0 query = {'objects.value': value} for obj in obj_list: obj_class = class_from_type(obj) count += len(obj_class.objects(__raw__=query)) if count > 1: break else: col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) grid.remove({'md5': value}) return
def delete_object_file(value): """ In the event this is a file (but not PCAP), clean up after ourselves when deleting an object. :param value: The value of the object we are deleting. :type value: str """ if not re.match(r"^[a-f\d]{32}$", value, re.I): return #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. obj_list = ('Actor', 'Backdoor', 'Campaign', 'Certificate', 'Domain', 'Email', 'Event', 'Exploit', 'Indicator', 'IP', 'PCAP', 'RawData', 'Sample', 'Target', ) # In order to make sure this object isn't tied to more than one top-level # object, we need to check the rest of the database. We will at least find # one instance, which is the one we are going to be removing. If we find # another instance, then we should not remove the object from GridFS. count = 0 query = {'objects.value': value} for obj in obj_list: obj_class = class_from_type(obj) count += len(obj_class.objects(__raw__=query)) if count > 1: break else: col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) grid.remove({'md5': value}) return
def run(self, argv): parser = OptionParser() parser.add_option("-f", "--filter", action="store", dest="filter", type="string", help="filetype filter") parser.add_option("-o", "--output-dir", action="store", dest="out", type="string", help="output directory") parser.add_option("-y", "--yaml", action="store_true", dest="yaml", default=False, help="export in YAML") parser.add_option("-j", "--json", action="store_true", dest="json", default=False, help="export in JSON") (opts, args) = parser.parse_args(argv) emails = mongo_connector(settings.COL_EMAIL) if opts.filter: query = ast.literal_eval(opts.filter) else: query = {} if opts.yaml: meta_format = "yaml" elif opts.json: meta_format = "json" else: print(parser.format_help().strip()) return emails = emails.find(query, {}) if opts.out: path = opts.out else: path = os.getcwd() for email in emails: email_id = str(email['_id']) data = format_object("Email", email_id, "json", remove_source=True, remove_rels=False, # should this be False? remove_schema_version=True, remove_campaign=True ) if data: pathname = os.path.join(path, email_id + "." + meta_format) print "[+] Writing %s" % pathname with open(pathname, "wb") as f: f.write(data)
def migrate_roles(): """ Migrate legacy role objects to new RBAC Role objects """ from crits.core.mongo_tools import mongo_connector import sys collection = mongo_connector(settings.COL_USERS) users = collection.find() for user in users: roles = [] role = None try: if 'role' in user: role = user['role'] elif 'unsupported_attrs' in user and 'role' in user['unsupported_attrs']: role = user['unsupported_attrs']['role'] else: print "Error migrating legacy roles for user %s. No legacy role found to migrate." % user sys.exit() except: print "Error migrating legacy roles for user %s. No legacy role found to migrate." % user sys.exit() if role == 'Administrator': roles.append('UberAdmin') elif role == 'Analyst': roles.append('Analyst') elif role == 'Read Only': roles.append('Read Only') user = CRITsUser.objects(username=user['username']).first() user.roles = roles user.save()
def migrate_roles(): """ Migrate legacy role objects to new RBAC Role objects """ from crits.core.mongo_tools import mongo_connector import sys collection = mongo_connector(settings.COL_USERS) users = collection.find() for user in users: roles = [] role = None try: if 'role' in user: role = user['role'] elif 'unsupported_attrs' in user and 'role' in user[ 'unsupported_attrs']: role = user['unsupported_attrs']['role'] else: print "Error migrating legacy roles for user %s. No legacy role found to migrate." % user sys.exit() except: print "Error migrating legacy roles for user %s. No legacy role found to migrate." % user sys.exit() if role == 'Administrator': roles.append('UberAdmin') elif role == 'Analyst': roles.append('Analyst') elif role == 'Read Only': roles.append('Read Only') user = CRITsUser.objects(username=user['username']).first() user.roles = roles user.save()
def handle(self, *args, **options): """ Script Execution. """ buckets = {} pipeline = [ {'$match': {'$and': [{'bucket_list': {'$ne': []}}, {'bucket_list': {'$exists': True}}]}}, # only TLOs with buckets {'$unwind': '$bucket_list'}, # split each bucket out of list {'$match': {'bucket_list': {'$ne': ''}}}, # ignore any empty string buckets {'$group': {'_id': {'tag': '$bucket_list', 'id': '$_id'}}}, # get unique per TLO {'$group': {'_id': '$_id.tag', 'count': {'$sum': 1}}}, # total bucket counts ] tlo_types = [ 'Actor', 'Campaign', 'Certificate', 'Domain', 'Email', 'Event', 'Indicator', 'IP', 'PCAP', 'RawData', 'Signature', 'Sample', 'Target', ] for tlo_type in tlo_types: coll = class_from_type(tlo_type)._meta['collection'] result = mongo_connector(coll).aggregate(pipeline) for x in result['result']: bucket = x['_id'] if bucket not in buckets: buckets[bucket] = Bucket() buckets[bucket].name = bucket setattr(buckets[bucket], tlo_type, x['count']) # Drop all existing buckets Bucket.objects().delete_one() for bucket in buckets.values(): bucket.save()
def get_campaign_stats(campaign): """ Get the statistics for this Campaign generated by mapreduce. :param campaign: The name of the Campaign to get stats for. :type campaign: str :returns: list of dictionaries """ # The Statistics collection has a bunch of documents which are not # in the same format, so we can't class it at this time. stats = mongo_connector(settings.COL_STATISTICS) stat = stats.find_one({"name": "campaign_monthly"}) data_list = [] if stat: for result in stat["results"]: if campaign == result["campaign"] or campaign == "all": data = {} data["label"] = result["campaign"] data["data"] = [] for k in sorted(result["value"].keys()): data["data"].append([k, result["value"][k]]) data_list.append(data) return data_list
def add_object(type_, id_, object_type, source, method, reference, tlp, user, value=None, file_=None, add_indicator=False, get_objects=True, tlo=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}, **kwargs): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param id_: The ObjectId of the top-level object. :type id_: str :param object_type: The type of the ObjectType being added. :type object_type: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param user: The user adding this object. :type user: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_objects: bool :param tlo: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type tlo: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :param is_sort_relationships: Return all relationships and meta, sorted :type is_sort_relationships: bool :param is_validate_only: Validate, but do not add to TLO. :type is_validate_only: bool :param is_validate_locally: Validate, but do not add b/c there is no TLO. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ # if object_type is a validated indicator type, then validate value if value: from crits.indicators.handlers import validate_indicator_value (value, error) = validate_indicator_value(value, object_type) if error: return {"success": False, "message": error} if is_validate_locally: # no TLO provided return {"success": True} if not tlo: if type_ and id_: tlo = class_from_id(type_, id_) if not tlo: return {'success': False, 'message': "Failed to find TLO"} try: if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename ret = tlo.add_object(object_type, value, source, method, reference, user) if not ret['success']: msg = '%s! [Type: "%s"][Value: "%s"]' return {"success": False, "message": msg % (ret['message'], object_type, value)} else: results = {'success': True} if not is_validate_only: # save the object tlo.update(add_to_set__obj=ret['object']) results['message'] = "Object added successfully" if file_: # do we have a pcap? if detect_pcap(data): handle_pcap_file(filename, data, source, user=user, related_id=id_, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and not is_validate_only: campaign = tlo.campaign if hasattr(tlo, 'campaign') else None from crits.indicators.handlers import handle_indicator_ind ind_res = handle_indicator_ind(value, source, object_type, IndicatorThreatTypes.UNKNOWN, IndicatorAttackTypes.UNKNOWN, user, source_method=method, source_reference=reference, source_tlp=tlp, add_domain=True, campaign=campaign, cache=cache) if ind_res['success']: forge_relationship(class_=tlo, right_class=ind_res['object'], rel_type=RelationshipTypes.RELATED_TO, user=user) else: msg = "Object added, but failed to add Indicator.<br>Error: %s" results['message'] = msg % ind_res.get('message') if is_sort_relationships == True: results['relationships'] = tlo.sort_relationships(user, meta=True) if get_objects: results['objects'] = tlo.sort_objects() results['id'] = str(tlo.id) return results except ValidationError as e: return {'success': False, 'message': str(e)}
def run(self, argv): parser = OptionParser() parser.add_option( "-c", "--collection", action="store", dest="collection", type="string", default="samples", help="name of old samples collection if not 'samples'") parser.add_option( "-d", "--delete-samples", action="store", dest="sampledelete", type="string", help="path to results file with sample md5s to delete") parser.add_option( "-D", "--delete-files", action="store", dest="griddelete", type="string", help="path to results file with gridfs md5s to delete") parser.add_option("", "--delete-samples-query", action="store", dest="sampledeletequery", type="string", help="query for samples to delete") parser.add_option( "-f", "--filter", action="store", dest="filter", type="string", help="filter for samples to migrate and output results file") parser.add_option("-g", "--gridfs", action="store", dest="gridfs", type="string", help="path to gridfs file with md5s to migrate") (opts, args) = parser.parse_args(argv) if opts.sampledelete or opts.sampledeletequery: print "Deleting matching md5s from %s..." % opts.collection count = 0 failed = 0 try: samples = mongo_connector(opts.collection) except Exception, e: print "Error: %s" % str(e) sys.exit(1) try: if opts.sampledeletequery: query = ast.literal_eval(opts.sampledeletequery) samples.remove(query) print "Deleted samples matching query %s..." % query else: with open(opts.sampledelete) as o: for line in o: md5 = line.strip() samples.remove({'hashes.md5': md5}) count += 1 print "Deleted %s md5s from old collection." % count except Exception, e: print "Mongo Error: %s" % str(e) sys.exit(1)
def __init__(self): self.samples = mongo_connector(settings.COL_SAMPLES) self.grid = mongo_connector("%s.files" % settings.COL_SAMPLES) self._clean() print "[+] Initializing with fn='%s', source='%s'" % (self.test_filename, self.sources)
def create_indexes(): """ Creates the default set of indexes for the system. Depending on your use cases, as well as quantity of data, admins may wish to tweak these indexes to best fit their requirements. """ print "Creating indexes (duplicates will be ignored automatically)" analysis_results = mongo_connector(settings.COL_ANALYSIS_RESULTS) analysis_results.ensure_index("service_name", background=True) analysis_results.ensure_index("object_type", background=True) analysis_results.ensure_index("object_id", background=True) bucket_lists = mongo_connector(settings.COL_BUCKET_LISTS) bucket_lists.ensure_index("name", background=True) campaigns = mongo_connector(settings.COL_CAMPAIGNS) campaigns.ensure_index("objects.value", background=True) campaigns.ensure_index("relationships.value", background=True) campaigns.ensure_index("bucket_list", background=True) comments = mongo_connector(settings.COL_COMMENTS) comments.ensure_index("obj_id", background=True) comments.ensure_index("users", background=True) comments.ensure_index("tags", background=True) comments.ensure_index("status", background=True) domains = mongo_connector(settings.COL_DOMAINS) domains.ensure_index("domain", background=True) domains.ensure_index("objects.value", background=True) domains.ensure_index("relationships.value", background=True) domains.ensure_index("campaign.name", background=True) domains.ensure_index("bucket_list", background=True) emails = mongo_connector(settings.COL_EMAIL) emails.ensure_index("objects.value", background=True) emails.ensure_index("relationships.value", background=True) emails.ensure_index("campaign.name", background=True) emails.ensure_index("bucket_list", background=True) events = mongo_connector(settings.COL_EVENTS) events.ensure_index("objects.value", background=True) events.ensure_index("relationships.value", background=True) events.ensure_index("campaign.name", background=True) events.ensure_index("bucket_list", background=True) indicators = mongo_connector(settings.COL_INDICATORS) indicators.ensure_index("value", background=True) indicators.ensure_index("objects.value", background=True) indicators.ensure_index("relationships.value", background=True) indicators.ensure_index("campaign.name", background=True) indicators.ensure_index("bucket_list", background=True) ips = mongo_connector(settings.COL_IPS) ips.ensure_index("ip", background=True) ips.ensure_index("objects.value", background=True) ips.ensure_index("relationships.value", background=True) ips.ensure_index("campaign.name", background=True) ips.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: objects_files = mongo_connector('%s.files' % settings.COL_OBJECTS) objects_files.ensure_index("md5", background=True) objects_chunks = mongo_connector('%s.chunks' % settings.COL_OBJECTS) objects_chunks.ensure_index([("files_id",pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) notifications = mongo_connector(settings.COL_NOTIFICATIONS) notifications.ensure_index("obj_id", background=True) # auto-expire notifications after 30 days notifications.ensure_index("date", background=True, expireAfterSeconds=2592000) notifications.ensure_index("users", background=True) pcaps = mongo_connector(settings.COL_PCAPS) pcaps.ensure_index("md5", background=True) pcaps.ensure_index("objects.value", background=True) pcaps.ensure_index("relationships.value", background=True) pcaps.ensure_index("campaign.name", background=True) pcaps.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: pcaps_files = mongo_connector('%s.files' % settings.COL_PCAPS) pcaps_files.ensure_index("md5", background=True) pcaps_chunks = mongo_connector('%s.chunks' % settings.COL_PCAPS) pcaps_chunks.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) raw_data = mongo_connector(settings.COL_RAW_DATA) raw_data.ensure_index("link_id", background=True) raw_data.ensure_index("md5", background=True) raw_data.ensure_index("objects.value", background=True) raw_data.ensure_index("relationships.value", background=True) raw_data.ensure_index("campaign.name", background=True) raw_data.ensure_index("bucket_list", background=True) samples = mongo_connector(settings.COL_SAMPLES) samples.ensure_index("source.name", background=True) samples.ensure_index("md5", background=True) samples.ensure_index("sha1", background=True) samples.ensure_index("sha256", background=True) samples.ensure_index("ssdeep", background=True) samples.ensure_index("mimetype", background=True) samples.ensure_index("filetype", background=True) samples.ensure_index("size", background=True) samples.ensure_index("filename", background=True) samples.ensure_index("objects.value", background=True) samples.ensure_index("relationships.value", background=True) samples.ensure_index("campaign.name", background=True) samples.ensure_index("backdoor.name", background=True) samples.ensure_index("exploit.cve", background=True) samples.ensure_index("analysis.results.result", background=True) samples.ensure_index("analysis.results.md5", background=True) samples.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: samples_files = mongo_connector('%s.files' % settings.COL_SAMPLES) samples_files.ensure_index("md5", background=True) samples_chunks = mongo_connector('%s.chunks' % settings.COL_SAMPLES) samples_chunks.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) screenshots = mongo_connector(settings.COL_SCREENSHOTS) screenshots.ensure_index("tags", background=True) targets = mongo_connector(settings.COL_TARGETS) targets.ensure_index("objects.value", background=True) targets.ensure_index("relationships.value", background=True) targets.ensure_index("campaign.name", background=True) targets.ensure_index("bucket_list", background=True)
def add_object(type_, id_, object_type, source, method, reference, user, value=None, file_=None, add_indicator=False, get_objects=True, tlo=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}, **kwargs): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param id_: The ObjectId of the top-level object. :type id_: str :param object_type: The type of the ObjectType being added. :type object_type: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param user: The user adding this object. :type user: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_object: bool :param tlo: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type tlo: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :param is_validate_only: Validate, but do not add to TLO. :type is_validate_only: bool :param is_validate_locally: Validate, but do not add b/c there is no TLO. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ results = {} obj = tlo if id_ == None: id_ = "" if obj == None: obj = class_from_id(type_, id_) from crits.indicators.handlers import validate_indicator_value if value is not None: (value, error) = validate_indicator_value(value, object_type) if error: return {"success": False, "message": error} if is_validate_locally: # no obj provided results['success'] = True return results if not obj: results['message'] = "TLO could not be found" results['success'] = False return results try: cur_len = len(obj.obj) if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename obj.add_object(object_type, value, source, method, reference, user) if is_validate_only == False: obj.save(username=user) new_len = len(obj.obj) if new_len > cur_len: if not is_validate_only: results['message'] = "Object added successfully!" results['success'] = True if file_: # do we have a pcap? if data[:4] in ('\xa1\xb2\xc3\xd4', '\xd4\xc3\xb2\xa1', '\x0a\x0d\x0d\x0a'): handle_pcap_file(filename, data, source, user=user, related_id=id_, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and is_validate_only == False: from crits.indicators.handlers import handle_indicator_ind campaign = obj.campaign if hasattr(obj, 'campaign') else None ind_res = handle_indicator_ind(value, source, object_type, IndicatorThreatTypes.UNKNOWN, IndicatorAttackTypes.UNKNOWN, user, method, reference, add_domain=True, campaign=campaign, cache=cache) if ind_res['success']: ind = ind_res['object'] forge_relationship(class_=obj, right_class=ind, rel_type=RelationshipTypes.RELATED_TO, user=user, get_rels=is_sort_relationships) else: results['message'] = "Object was added, but failed to add Indicator." \ "<br>Error: " + ind_res.get('message') if is_sort_relationships == True: if file_ or add_indicator: # does this line need to be here? # obj.reload() results['relationships'] = obj.sort_relationships( user, meta=True) else: results['relationships'] = obj.sort_relationships( user, meta=True) else: results[ 'message'] = "Object already exists! [Type: " + object_type + "][Value: " + value + "] " results['success'] = False if (get_objects): results['objects'] = obj.sort_objects() results['id'] = str(obj.id) return results except ValidationError, e: return {'success': False, 'message': str(e)}
def add_object(type_, oid, object_type, name, source, method, reference, analyst, value=None, file_=None, add_indicator=False, get_objects=True, obj=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param oid: The ObjectId of the top-level object. :type oid: str :param object_type: The type of the ObjectType being added. :type object_type: str :param name: The name of the ObjectType being added. :type name: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param analyst: The user adding this object. :type analyst: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_object: bool :param obj: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type obj: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :param is_validate_only: Only validate, do not add. :type is_validate_only: bool :param is_validate_locally: Only validate, do not add. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ results = {} if oid == None: oid = "" if obj == None: obj = class_from_id(type_, oid) if not obj: if is_validate_locally == True: # TODO: Perform some form of validation results['success'] = True return results else: results['message'] = "Could not find item to add object to." results['success'] = False return results if name == "URL" and "://" not in value.split('.')[0]: return { "success": False, "message": "URI - URL must contain protocol prefix (e.g. http://, https://, ftp://)" } elif object_type == "Address": if "ipv4" in name: try: validate_ipv4_address(value) except DjangoValidationError: return {"success": False, "message": "Invalid IPv4 address. "} elif "ipv6" in name: try: validate_ipv6_address(value) except DjangoValidationError: return {"success": False, "message": "Invalid IPv6 address. "} elif "cidr" in name: try: if '/' not in value: raise ValidationError("") cidr_parts = value.split('/') if int(cidr_parts[1]) < 0 or int(cidr_parts[1]) > 128: raise ValidationError("") if ':' not in cidr_parts[0] and int(cidr_parts[1]) > 32: raise ValidationError("") validate_ipv46_address(cidr_parts[0]) except (ValidationError, ValueError) as cidr_error: return {"success": False, "message": "Invalid CIDR address. "} try: cur_len = len(obj.obj) if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename obj.add_object(object_type, name, value, source, method, reference, analyst) if is_validate_only == False: obj.save(username=analyst) new_len = len(obj.obj) if new_len > cur_len: results['message'] = "Object added successfully!" results['success'] = True if file_: # do we have a pcap? if data[:4] in ('\xa1\xb2\xc3\xd4', '\xd4\xc3\xb2\xa1', '\x0a\x0d\x0d\x0a'): handle_pcap_file(filename, data, source, user=analyst, related_id=oid, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and is_validate_only == False: from crits.indicators.handlers import handle_indicator_ind if object_type != name: object_type = "%s - %s" % (object_type, name) campaign = obj.campaign if hasattr(obj, 'campaign') else None ind_res = handle_indicator_ind(value, source, reference, object_type, analyst, method, add_domain=True, campaign=campaign, cache=cache) if ind_res['success']: ind = ind_res['object'] forge_relationship(left_class=obj, right_class=ind, rel_type="Related_To", analyst=analyst, get_rels=is_sort_relationships) else: results['message'] = "Object was added, but failed to add Indicator." \ "<br>Error: " + ind_res.get('message') if is_sort_relationships == True: if file_ or add_indicator: # does this line need to be here? # obj.reload() results['relationships'] = obj.sort_relationships( analyst, meta=True) else: results['relationships'] = obj.sort_relationships( analyst, meta=True) else: results[ 'message'] = "Object already exists! [Type: " + object_type + "][Value: " + value + "] " results['success'] = False if (get_objects): results['objects'] = obj.sort_objects() results['id'] = str(obj.id) return results except ValidationError, e: return {'success': False, 'message': str(e)}
def create_indexes(): """ Creates the default set of indexes for the system. Depending on your use cases, as well as quantity of data, admins may wish to tweak these indexes to best fit their requirements. """ print "Creating indexes (duplicates will be ignored automatically)" bucket_lists = mongo_connector(settings.COL_BUCKET_LISTS) bucket_lists.ensure_index("name", background=True) campaigns = mongo_connector(settings.COL_CAMPAIGNS) campaigns.ensure_index("objects.value", background=True) campaigns.ensure_index("relationships.value", background=True) campaigns.ensure_index("bucket_list", background=True) comments = mongo_connector(settings.COL_COMMENTS) comments.ensure_index("obj_id", background=True) comments.ensure_index("users", background=True) comments.ensure_index("tags", background=True) comments.ensure_index("status", background=True) domains = mongo_connector(settings.COL_DOMAINS) domains.ensure_index("domain", background=True) domains.ensure_index("objects.value", background=True) domains.ensure_index("relationships.value", background=True) domains.ensure_index("campaign.name", background=True) domains.ensure_index("bucket_list", background=True) emails = mongo_connector(settings.COL_EMAIL) emails.ensure_index("objects.value", background=True) emails.ensure_index("relationships.value", background=True) emails.ensure_index("campaign.name", background=True) emails.ensure_index("bucket_list", background=True) events = mongo_connector(settings.COL_EVENTS) events.ensure_index("objects.value", background=True) events.ensure_index("relationships.value", background=True) events.ensure_index("campaign.name", background=True) events.ensure_index("bucket_list", background=True) indicators = mongo_connector(settings.COL_INDICATORS) indicators.ensure_index("value", background=True) indicators.ensure_index("objects.value", background=True) indicators.ensure_index("relationships.value", background=True) indicators.ensure_index("campaign.name", background=True) indicators.ensure_index("bucket_list", background=True) ips = mongo_connector(settings.COL_IPS) ips.ensure_index("ip", background=True) ips.ensure_index("objects.value", background=True) ips.ensure_index("relationships.value", background=True) ips.ensure_index("campaign.name", background=True) ips.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: objects_files = mongo_connector('%s.files' % settings.COL_OBJECTS) objects_files.ensure_index("md5", background=True) objects_chunks = mongo_connector('%s.chunks' % settings.COL_OBJECTS) objects_chunks.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) notifications = mongo_connector(settings.COL_NOTIFICATIONS) # auto-expire notifications after 30 days notifications.ensure_index("obj_id", background=True, expireAfterSeconds=2592000) notifications.ensure_index("users", background=True) pcaps = mongo_connector(settings.COL_PCAPS) pcaps.ensure_index("md5", background=True) pcaps.ensure_index("objects.value", background=True) pcaps.ensure_index("relationships.value", background=True) pcaps.ensure_index("campaign.name", background=True) pcaps.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: pcaps_files = mongo_connector('%s.files' % settings.COL_PCAPS) pcaps_files.ensure_index("md5", background=True) pcaps_chunks = mongo_connector('%s.chunks' % settings.COL_PCAPS) pcaps_chunks.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) raw_data = mongo_connector(settings.COL_RAW_DATA) raw_data.ensure_index("link_id", background=True) raw_data.ensure_index("md5", background=True) raw_data.ensure_index("objects.value", background=True) raw_data.ensure_index("relationships.value", background=True) raw_data.ensure_index("campaign.name", background=True) raw_data.ensure_index("bucket_list", background=True) samples = mongo_connector(settings.COL_SAMPLES) samples.ensure_index("source.name", background=True) samples.ensure_index("md5", background=True) samples.ensure_index("sha1", background=True) samples.ensure_index("sha256", background=True) samples.ensure_index("ssdeep", background=True) samples.ensure_index("mimetype", background=True) samples.ensure_index("filetype", background=True) samples.ensure_index("size", background=True) samples.ensure_index("filename", background=True) samples.ensure_index("objects.value", background=True) samples.ensure_index("relationships.value", background=True) samples.ensure_index("campaign.name", background=True) samples.ensure_index("backdoor.name", background=True) samples.ensure_index("exploit.cve", background=True) samples.ensure_index("analysis.results.result", background=True) samples.ensure_index("analysis.results.md5", background=True) samples.ensure_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: samples_files = mongo_connector('%s.files' % settings.COL_SAMPLES) samples_files.ensure_index("md5", background=True) samples_chunks = mongo_connector('%s.chunks' % settings.COL_SAMPLES) samples_chunks.ensure_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) targets = mongo_connector(settings.COL_TARGETS) targets.ensure_index("objects.value", background=True) targets.ensure_index("relationships.value", background=True) targets.ensure_index("campaign.name", background=True) targets.ensure_index("bucket_list", background=True)
def run(self, argv): parser = OptionParser() parser.add_option("-f", "--filter", action="store", dest="filter", type="string", help="filetype filter") parser.add_option("-o", "--output-dir", action="store", dest="out", type="string", help="output directory") parser.add_option("-y", "--yaml", action="store_true", dest="yaml", default=False, help="export in YAML") parser.add_option("-j", "--json", action="store_true", dest="json", default=False, help="export in JSON") (opts, args) = parser.parse_args(argv) emails = mongo_connector(settings.COL_EMAIL) if opts.filter: query = ast.literal_eval(opts.filter) else: query = {} if opts.yaml: meta_format = "yaml" elif opts.json: meta_format = "json" else: print(parser.format_help().strip()) return emails = emails.find(query, {}) if opts.out: path = opts.out else: path = os.getcwd() for email in emails: email_id = str(email['_id']) data = format_object( "Email", email_id, "json", remove_source=True, remove_rels=False, # should this be False? remove_schema_version=True, remove_campaign=True) if data: pathname = os.path.join(path, email_id + "." + meta_format) print "[+] Writing %s" % pathname with open(pathname, "wb") as f: f.write(data)
def add_object(type_, id_, object_type, source, method, reference, tlp, user, value=None, file_=None, add_indicator=False, get_objects=True, tlo=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}, **kwargs): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param id_: The ObjectId of the top-level object. :type id_: str :param object_type: The type of the ObjectType being added. :type object_type: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param user: The user adding this object. :type user: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_objects: bool :param tlo: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type tlo: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :param is_sort_relationships: Return all relationships and meta, sorted :type is_sort_relationships: bool :param is_validate_only: Validate, but do not add to TLO. :type is_validate_only: bool :param is_validate_locally: Validate, but do not add b/c there is no TLO. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ # if object_type is a validated indicator type, then validate value if value: from crits.indicators.handlers import validate_indicator_value (value, error) = validate_indicator_value(value, object_type) if error: return {"success": False, "message": error} if is_validate_locally: # no TLO provided return {"success": True} if not tlo: if type_ and id_: tlo = class_from_id(type_, id_) if not tlo: return {'success': False, 'message': "Failed to find TLO"} try: if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename ret = tlo.add_object(object_type, value, source, method, reference, user) if not ret['success']: msg = '%s! [Type: "%s"][Value: "%s"]' return { "success": False, "message": msg % (ret['message'], object_type, value) } else: results = {'success': True} if not is_validate_only: # save the object tlo.update(add_to_set__obj=ret['object']) results['message'] = "Object added successfully" if file_: # do we have a pcap? if detect_pcap(data): handle_pcap_file(filename, data, source, user=user, related_id=id_, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and not is_validate_only: campaign = tlo.campaign if hasattr(tlo, 'campaign') else None from crits.indicators.handlers import handle_indicator_ind ind_res = handle_indicator_ind(value, source, object_type, IndicatorThreatTypes.UNKNOWN, IndicatorAttackTypes.UNKNOWN, user, source_method=method, source_reference=reference, source_tlp=tlp, add_domain=True, campaign=campaign, cache=cache) if ind_res['success']: forge_relationship(class_=tlo, right_class=ind_res['object'], rel_type=RelationshipTypes.RELATED_TO, user=user) else: msg = "Object added, but failed to add Indicator.<br>Error: %s" results['message'] = msg % ind_res.get('message') if is_sort_relationships == True: results['relationships'] = tlo.sort_relationships(user, meta=True) if get_objects: results['objects'] = tlo.sort_objects() results['id'] = str(tlo.id) return results except ValidationError as e: return {'success': False, 'message': str(e)}
def add_object(type_, oid, object_type, name, source, method, reference, analyst, value=None, file_=None, add_indicator=False, get_objects=True, obj=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param oid: The ObjectId of the top-level object. :type oid: str :param object_type: The type of the ObjectType being added. :type object_type: str :param name: The name of the ObjectType being added. :type name: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param analyst: The user adding this object. :type analyst: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_object: bool :param obj: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type obj: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :param is_validate_only: Only validate, do not add. :type is_validate_only: bool :param is_validate_locally: Only validate, do not add. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ results = {} if oid == None: oid = "" if obj == None: obj = class_from_id(type_, oid) if not obj: if is_validate_locally == True: # TODO: Perform some form of validation results['success'] = True return results else: results['message'] = "Could not find item to add object to." results['success'] = False return results if name == "URL" and "://" not in value.split('.')[0]: return {"success" : False, "message" : "URI - URL must contain protocol prefix (e.g. http://, https://, ftp://)"} elif object_type == "Address": if "ipv4" in name: try: validate_ipv4_address(value) except DjangoValidationError: return {"success" : False, "message" : "Invalid IPv4 address. "} elif "ipv6" in name: try: validate_ipv6_address(value) except DjangoValidationError: return {"success" : False, "message" : "Invalid IPv6 address. "} elif "cidr" in name: try: if '/' not in value: raise ValidationError("") cidr_parts = value.split('/') if int(cidr_parts[1]) < 0 or int(cidr_parts[1]) > 128: raise ValidationError("") if ':' not in cidr_parts[0] and int(cidr_parts[1]) > 32: raise ValidationError("") validate_ipv46_address(cidr_parts[0]) except (ValidationError, ValueError) as cidr_error: return {"success" : False, "message" : "Invalid CIDR address. "} try: cur_len = len(obj.obj) if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename obj.add_object(object_type, name, value, source, method, reference, analyst) if is_validate_only == False: obj.save(username=analyst) new_len = len(obj.obj) if new_len > cur_len: results['message'] = "Object added successfully!" results['success'] = True if file_: # do we have a pcap? if data[:4] in ('\xa1\xb2\xc3\xd4', '\xd4\xc3\xb2\xa1', '\x0a\x0d\x0d\x0a'): handle_pcap_file(filename, data, source, user=analyst, related_id=oid, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and is_validate_only == False: from crits.indicators.handlers import handle_indicator_ind if object_type != name: object_type = "%s - %s" % (object_type, name) campaign = obj.campaign if hasattr(obj, 'campaign') else None ind_res = handle_indicator_ind(value, source, reference, object_type, analyst, method, add_domain=True, campaign=campaign, cache=cache) if ind_res['success']: ind = ind_res['object'] forge_relationship(left_class=obj, right_class=ind, rel_type="Related_To", analyst=analyst, get_rels=is_sort_relationships) else: results['message'] = "Object was added, but failed to add Indicator." \ "<br>Error: " + ind_res.get('message') if is_sort_relationships == True: if file_ or add_indicator: # does this line need to be here? # obj.reload() results['relationships'] = obj.sort_relationships(analyst, meta=True) else: results['relationships'] = obj.sort_relationships(analyst, meta=True) else: results['message'] = "Object already exists! [Type: " + object_type + "][Value: " + value + "] " results['success'] = False if (get_objects): results['objects'] = obj.sort_objects() results['id'] = str(obj.id) return results except ValidationError, e: return {'success': False, 'message': str(e)}
def create_indexes(): """ Creates the default set of indexes for the system. Depending on your use cases, as well as quantity of data, admins may wish to tweak these indexes to best fit their requirements. """ print "Creating indexes (duplicates will be ignored automatically)" analysis_results = mongo_connector(settings.COL_ANALYSIS_RESULTS) analysis_results.create_index("service_name", background=True) analysis_results.create_index("object_type", background=True) analysis_results.create_index("object_id", background=True) analysis_results.create_index("start_date", background=True) analysis_results.create_index("finish_date", background=True) analysis_results.create_index("version", background=True) analysis_results.create_index("analysis_id", background=True) bucket_lists = mongo_connector(settings.COL_BUCKET_LISTS) bucket_lists.create_index("name", background=True) backdoors = mongo_connector(settings.COL_BACKDOORS) backdoors.create_index("name", background=True) campaigns = mongo_connector(settings.COL_CAMPAIGNS) campaigns.create_index("objects.value", background=True) campaigns.create_index("relationships.value", background=True) campaigns.create_index("bucket_list", background=True) comments = mongo_connector(settings.COL_COMMENTS) comments.create_index("obj_id", background=True) comments.create_index("users", background=True) comments.create_index("tags", background=True) comments.create_index("status", background=True) domains = mongo_connector(settings.COL_DOMAINS) domains.create_index("domain", background=True) domains.create_index("objects.value", background=True) domains.create_index("relationships.value", background=True) domains.create_index("campaign.name", background=True) domains.create_index("bucket_list", background=True) emails = mongo_connector(settings.COL_EMAIL) emails.create_index("objects.value", background=True) emails.create_index("relationships.value", background=True) emails.create_index("campaign.name", background=True) emails.create_index("bucket_list", background=True) emails.create_index("favorite", background=True) emails.create_index("from", background=True) emails.create_index("source.name", background=True) emails.create_index("status", background=True) emails.create_index("subject", background=True) emails.create_index("isodate", background=True) events = mongo_connector(settings.COL_EVENTS) events.create_index("objects.value", background=True) events.create_index("title", background=True) events.create_index("relationships.value", background=True) events.create_index("campaign.name", background=True) events.create_index("source.name", background=True) events.create_index("-created", background=True) events.create_index("status", background=True) events.create_index("favorite", background=True) events.create_index("event_type", background=True) events.create_index("bucket_list", background=True) exploits = mongo_connector(settings.COL_EXPLOITS) exploits.create_index("name", background=True) indicators = mongo_connector(settings.COL_INDICATORS) indicators.create_index("value", background=True) indicators.create_index("lower", background=True) indicators.create_index("objects.value", background=True) indicators.create_index("relationships.value", background=True) indicators.create_index("campaign.name", background=True) indicators.create_index("-created", background=True) indicators.create_index("-modified", background=True) indicators.create_index("type", background=True) indicators.create_index("status", background=True) indicators.create_index("source.name", background=True) indicators.create_index("bucket_list", background=True) ips = mongo_connector(settings.COL_IPS) ips.create_index("ip", background=True) ips.create_index("objects.value", background=True) ips.create_index("relationships.value", background=True) ips.create_index("campaign.name", background=True) ips.create_index("-created", background=True) ips.create_index("-modified", background=True) ips.create_index("source.name", background=True) ips.create_index("status", background=True) ips.create_index("type", background=True) ips.create_index("favorite", background=True) ips.create_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: objects_files = mongo_connector('%s.files' % settings.COL_OBJECTS) objects_files.create_index("md5", background=True) objects_chunks = mongo_connector('%s.chunks' % settings.COL_OBJECTS) objects_chunks.create_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) notifications = mongo_connector(settings.COL_NOTIFICATIONS) notifications.create_index("obj_id", background=True) # auto-expire notifications after 30 days notifications.create_index("date", background=True, expireAfterSeconds=2592000) notifications.create_index("users", background=True) pcaps = mongo_connector(settings.COL_PCAPS) pcaps.create_index("md5", background=True) pcaps.create_index("objects.value", background=True) pcaps.create_index("relationships.value", background=True) pcaps.create_index("campaign.name", background=True) pcaps.create_index("filename", background=True) pcaps.create_index("description", background=True) pcaps.create_index("length", background=True) pcaps.create_index("-modified", background=True) pcaps.create_index("source.name", background=True) pcaps.create_index("status", background=True) pcaps.create_index("favorite", background=True) pcaps.create_index("bucket_list", background=True) if settings.FILE_DB == settings.GRIDFS: pcaps_files = mongo_connector('%s.files' % settings.COL_PCAPS) pcaps_files.create_index("md5", background=True) pcaps_chunks = mongo_connector('%s.chunks' % settings.COL_PCAPS) pcaps_chunks.create_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) raw_data = mongo_connector(settings.COL_RAW_DATA) raw_data.create_index("link_id", background=True) raw_data.create_index("md5", background=True) raw_data.create_index("title", background=True) raw_data.create_index("data_type", background=True) raw_data.create_index("version", background=True) raw_data.create_index("-modified", background=True) raw_data.create_index("source.name", background=True) raw_data.create_index("objects.value", background=True) raw_data.create_index("relationships.value", background=True) raw_data.create_index("campaign.name", background=True) raw_data.create_index("status", background=True) raw_data.create_index("favorite", background=True) raw_data.create_index("bucket_list", background=True) samples = mongo_connector(settings.COL_SAMPLES) samples.create_index("source.name", background=True) samples.create_index("md5", background=True) samples.create_index("sha1", background=True) samples.create_index("sha256", background=True) samples.create_index("ssdeep", background=True) samples.create_index("impfuzzy", background=True) samples.create_index("mimetype", background=True) samples.create_index("filetype", background=True) samples.create_index("size", background=True) samples.create_index("filename", background=True) samples.create_index("objects.value", background=True) samples.create_index("relationships.value", background=True) samples.create_index("campaign.name", background=True) samples.create_index("analysis.results.result", background=True) samples.create_index("analysis.results.md5", background=True) samples.create_index("bucket_list", background=True) samples.create_index("-created", background=True) samples.create_index("-modified", background=True) samples.create_index("favorite", background=True) samples.create_index("status", background=True) if settings.FILE_DB == settings.GRIDFS: samples_files = mongo_connector('%s.files' % settings.COL_SAMPLES) samples_files.create_index("md5", background=True) samples_chunks = mongo_connector('%s.chunks' % settings.COL_SAMPLES) samples_chunks.create_index([("files_id", pymongo.ASCENDING), ("n", pymongo.ASCENDING)], unique=True) screenshots = mongo_connector(settings.COL_SCREENSHOTS) screenshots.create_index("tags", background=True) signature = mongo_connector(settings.COL_SIGNATURES) signature.create_index("link_id", background=True) signature.create_index("md5", background=True) signature.create_index("title", background=True) signature.create_index("data_type", background=True) signature.create_index("data_type_min_version", background=True) signature.create_index("data_type_max_version", background=True) signature.create_index("data_type_dependency", background=True) signature.create_index("version", background=True) signature.create_index("-modified", background=True) signature.create_index("source.name", background=True) signature.create_index("objects.value", background=True) signature.create_index("relationships.value", background=True) signature.create_index("campaign.name", background=True) signature.create_index("status", background=True) signature.create_index("favorite", background=True) signature.create_index("bucket_list", background=True) targets = mongo_connector(settings.COL_TARGETS) targets.create_index("objects.value", background=True) targets.create_index("relationships.value", background=True) targets.create_index("email_address", background=True) targets.create_index("firstname", background=True) targets.create_index("lastname", background=True) targets.create_index("email_count", background=True) targets.create_index("campaign.name", background=True) targets.create_index("department", background=True) targets.create_index("division", background=True) targets.create_index("status", background=True) targets.create_index("favorite", background=True) targets.create_index("bucket_list", background=True)
def add_object(type_, oid, object_type, name, source, method, reference, analyst, value=None, file_=None, add_indicator=False, get_objects=True, indicator_campaign=None, indicator_campaign_confidence=None, obj=None, is_sort_relationships=False, is_validate_only=False, is_validate_locally=False, cache={}): """ Add an object to the database. :param type_: The top-level object type. :type type_: str :param oid: The ObjectId of the top-level object. :type oid: str :param object_type: The type of the ObjectType being added. :type object_type: str :param name: The name of the ObjectType being added. :type name: str :param source: The name of the source adding this object. :type source: str :param method: The method for this object. :type method: str :param reference: The reference for this object. :type reference: str :param analyst: The user adding this object. :type analyst: str :param value: The value of the object. :type value: str :param file_: The file if the object is a file upload. :type file_: file handle. :param add_indicator: Also add an indicator for this object. :type add_indicator: bool :param get_objects: Return the formatted list of objects when completed. :type get_object: bool :param is_validate_only: Only validate, do not add. :type is_validate_only: bool :param is_validate_locally: Only validate, do not add. :type is_validate_locally: bool :param cache: Cached data, typically for performance enhancements during bulk operations. :type cache: dict :param obj: The CRITs top-level object we are adding objects to. This is an optional parameter used mainly for performance reasons (by not querying mongo if we already have the top level-object). :type obj: :class:`crits.core.crits_mongoengine.CritsBaseAttributes` :returns: dict with keys: "success" (boolean), "message" (str), "objects" (list), "relationships" (list) """ results = {} if oid == None: oid = "" if obj == None: obj = class_from_id(type_, oid) if not obj: if is_validate_locally == True: # TODO: Perform some form of validation results['success'] = True return results else: results['message'] = "Could not find item to add object to." results['success'] = False return results try: cur_len = len(obj.obj) if file_: data = file_.read() filename = file_.name md5sum = md5(data).hexdigest() value = md5sum reference = filename obj.add_object(object_type, name, value, source, method, reference, analyst) if is_validate_only == False: obj.save(username=analyst) new_len = len(obj.obj) if new_len > cur_len: results['message'] = "Object added successfully!" results['success'] = True if file_: # do we have a pcap? if data[:4] in ('\xa1\xb2\xc3\xd4', '\xd4\xc3\xb2\xa1', '\x0a\x0d\x0d\x0a'): handle_pcap_file(filename, data, source, user=analyst, related_id=oid, related_type=type_) else: #XXX: MongoEngine provides no direct GridFS access so we # need to use pymongo directly. col = settings.COL_OBJECTS grid = mongo_connector("%s.files" % col) if grid.find({'md5': md5sum}).count() == 0: put_file(filename, data, collection=col) if add_indicator and is_validate_only == False: from crits.indicators.handlers import handle_indicator_ind if object_type != name: object_type = "%s - %s" % (object_type, name) ind_res = handle_indicator_ind( value, source, reference, object_type, analyst, method=method, add_domain=True, campaign=indicator_campaign, campaign_confidence=indicator_campaign_confidence, cache=cache) if ind_res['success']: ind = ind_res['object'] # Inherit campaigns from top level item when creating # an indicator from an object if no campaigns were specified if indicator_campaign == None and ind != None: for campaign in obj.campaign: ec = EmbeddedCampaign( name=campaign.name, confidence=campaign.confidence, description="", analyst=analyst, date=datetime.datetime.now()) ind.add_campaign(ec) ind.save(username=analyst) forge_relationship(left_class=obj, right_class=ind, rel_type="Related_To", analyst=analyst, get_rels=is_sort_relationships) if is_sort_relationships == True: if file_ or add_indicator: # does this line need to be here? # obj.reload() results['relationships'] = obj.sort_relationships( analyst, meta=True) else: results['relationships'] = obj.sort_relationships( analyst, meta=True) else: results[ 'message'] = "Object already exists! [Type: " + object_type + "][Value: " + value + "] " results['success'] = False if (get_objects): results['objects'] = obj.sort_objects() return results except ValidationError, e: return {'success': False, 'message': e}