def hash(request, left_id, right_hash): if enabledconf["mongodb"]: left = results_db.analysis.find_one({"info.id": int(left_id)}, {"target": 1, "info": 1}) if es_as_db: hits = es.search( index=get_analysis_index(), query=get_query_by_info_id(left_id) )["hits"]["hits"] if hits: left = hits[-1]["_source"] else: left = None if not left: return render(request, "error.html", {"error": "No analysis found with specified ID"}) # Select all analyses with same file hash. if enabledconf["mongodb"]: records = results_db.analysis.find( {"$and": [{"target.file.md5": left["target"]["file"]["md5"]}, {"info.id": {"$ne": int(left_id)}}]}, {"target": 1, "info": 1}, ) if es_as_db: records = [] q = {'query': {'bool': { 'must': [{'match': {'target.file.md5': right_hash}}], 'must_not': [{'match': {'info.id': left_id}}], }}} results = es.search(index=get_analysis_index(), body=q)["hits"]["hits"] for item in results: records.append(item["_source"]) # Select all analyses with specified file hash. return render(request, "compare/hash.html", {"left": left, "records": records, "hash": right_hash})
def both(request, left_id, right_id): if enabledconf["mongodb"]: left = results_db.analysis.find_one({"info.id": int(left_id)}, {"target": 1, "info": 1, "summary": 1}) right = results_db.analysis.find_one({"info.id": int(right_id)}, {"target": 1, "info": 1, "summary": 1}) # Execute comparison. counts = compare.helper_percentages_mongo(results_db, left_id, right_id) summary_compare = compare.helper_summary_mongo(results_db, left_id, right_id) elif es_as_db: left = es.search( index=get_analysis_index(), query=get_query_by_info_id(left_id), _source=['target', 'info'] )["hits"]["hits"][-1]["_source"] right = es.search( index=get_analysis_index(), query=get_query_by_info_id(right_id), _source=['target', 'info'] )["hits"]["hits"][-1]["_source"] counts = compare.helper_percentages_elastic(es, left_id, right_id) summary_compare = compare.helper_summary_elastic(es, left_id, right_id) return render( request, "compare/both.html", { "left": left, "right": right, "left_counts": counts[left_id], "right_counts": counts[right_id], "summary": summary_compare, }, )
def helper_summary_elastic(es_obj, tid1, tid2): left_sum, right_sum = None, None buf = es_obj.search(index=get_analysis_index(), query=get_query_by_info_id(tid1))["hits"]["hits"] if buf: left_sum = buf[-1]["_source"] buf = es_obj.search(index=get_analysis_index(), query=get_query_by_info_id(tid2))["hits"]["hits"] if buf: right_sum = buf[-1]["_source"] return get_similar_summary(left_sum, right_sum) if left_sum and right_sum else {}
def helper_percentages_elastic(es_obj, tid1, tid2, ignore_categories=["misc"]): counts = {} for tid in [tid1, tid2]: counts[tid] = {} results = es_obj.search( index=get_analysis_index(), query=get_query_by_info_id(tid))["hits"]["hits"] if results: pids_calls = results[-1]["_source"] else: pids_calls = None if not pids_calls: continue for pdoc in pids_calls["behavior"]["processes"]: pid = pdoc["process_id"] counts[tid][pid] = {} for coid in pdoc["calls"]: chunk = es_obj.search(index=get_calls_index(), body={"query": { "match": { "_id": coid } }})["hits"]["hits"][-1]["_source"] category_counts = behavior_categories_percent(chunk["calls"]) for cat, count in category_counts.items(): if cat in ignore_categories: continue counts[tid][pid][cat] = counts[tid][pid].get(cat, 0) + count return combine_behavior_percentages(counts)
def cuckoo_clean_lower_score(args): """Clean up tasks with score <= X It deletes all stored data from file system and configured databases (SQL and MongoDB for tasks. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. if not args.malscore: log.info("No malscore argument provided bailing") return create_structure() init_console_logging() id_arr = [] if not is_reporting_db_connected(): return if repconf.mongodb and repconf.mongodb.enabled: results_db = connect_to_mongo()[mdb] result = list(results_db.analysis.find({"malscore": {"$lte": args.malscore}})) id_arr = [entry["info"]["id"] for entry in result] elif repconf.elasticsearchdb.enabled: id_arr = [d["_source"]["info"]["id"] for d in all_docs( index=get_analysis_index(), query={ "query": { "range": { "malscore": { "lte": args.malscore } } } }, _source=["info.id"])] log.info(("number of matching records %s" % len(id_arr))) resolver_pool.map(lambda tid: delete_data(tid), id_arr)
def static_config_lookup(file_path, sha256=False): if not sha256: sha256 = hashlib.sha256(open(file_path, "rb").read()).hexdigest() if repconf.mongodb.enabled: document_dict = results_db.analysis.find_one( {"target.file.sha256": sha256}, { "CAPE.configs": 1, "info.id": 1, "_id": 0 }, sort=[("_id", pymongo.DESCENDING)]) elif repconf.elasticsearchdb.enabled: document_dict = es.search( index=get_analysis_index(), body={"query": { "match": { "target.file.sha256": sha256 } }}, _source=["CAPE.configs", "info.id"], sort={"_id": { "order": "desc" }}, )["hits"]["hits"][0]["_source"] else: document_dict = None if not document_dict: return has_config = document_dict.get("CAPE", {}).get("configs", []) if has_config: return document_dict["info"]
def cuckoo_clean(): """Clean up cuckoo setup. It deletes logs, all stored data from file system and configured databases (SQL and MongoDB. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Drop all tables. db.drop() if repconf.mongodb.enabled: mongo_drop_database(mdb) elif repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: analyses = all_docs(index=get_analysis_index(), query={"query": { "match_all": {} }}, _source=["info.id"]) if analyses: for analysis in analyses: delete_analysis_and_related_calls( analysis["_source"]["info"]["id"]) # Paths to clean. paths = [ os.path.join(CUCKOO_ROOT, "db"), os.path.join(CUCKOO_ROOT, "log"), os.path.join(CUCKOO_ROOT, "storage"), ] # Delete various directories. for path in paths: if os.path.isdir(path): try: shutil.rmtree(path) except (IOError, OSError) as e: log.warning("Error removing directory %s: %s", path, e) # Delete all compiled Python objects ("*.pyc"). for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT): for fname in filenames: if not fname.endswith(".pyc"): continue path = os.path.join(CUCKOO_ROOT, dirpath, fname) try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
def perform_malscore_search(value): if repconf.mongodb.enabled: return results_db.analysis.find({ "malscore": { "$gte": float(value) } }, perform_search_filters).sort([["_id", -1]]) elif repconf.elasticsearchdb.enabled: q = {"query": {"range": {"malscore": {"gte": float(value)}}}} _source_fields = list(perform_search_filters.keys())[:-1] return es.search(index=get_analysis_index(), body=q, _source=_source_fields)["hits"]["hits"]
def cuckoo_clean_failed_url_tasks(): """Clean up failed tasks It deletes all stored data from file system and configured databases (SQL and MongoDB for failed tasks. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() if not is_reporting_db_connected(): return if repconf.mongodb.enabled: rtmp = mongo_find("analysis", { "info.category": "url", "network.http.0": { "$exists": False } }, { "info.id": 1 }, sort=[("_id", -1)]).limit(100) elif repconf.elasticsearchdb.enabled: rtmp = [ d["_source"] for d in all_docs( index=get_analysis_index(), query={ "query": { "bool": { "must": [{ "exists": { "field": "network.http" } }, { "match": { "info.category": "url" } }] } } }, _source=["info.id"], ) ] else: rtmp = [] if rtmp and len(rtmp) > 0: resolver_pool.map(lambda tid: delete_data(tid), rtmp)
def perform_ttps_search(value): if len(value) == 5 and value.upper().startswith( "T") and value[1:].isdigit(): if repconf.mongodb.enabled: return results_db.analysis.find( { f"ttps.{value.upper()}": { "$exist": 1 } }, { "info.id": 1, "_id": 0 }).sort([["_id", -1]]) elif repconf.elasticsearchdb.enabled: q = {"query": {"match": {"ttps.ttp": value.upper()}}} return es.search(index=get_analysis_index(), body=q)["hits"]["hits"]
def remove(task_id): if repconf.mongodb.enabled or repconf.elasticsearchdb.enabled: if repconf.mongodb.enabled: analyses = list( results_db.analysis.find({"info.id": int(task_id)}, { "_id": 1, "behavior.processes": 1 })) elif repconf.elasticsearchdb.enabled: analyses = [ d["_source"] for d in es.search( index=get_analysis_index(), query=get_query_by_info_id(task_id), _source=["behavior.processes"])["hits"]["hits"] ] else: analyses = [] if len(analyses) > 1: message = "Multiple tasks with this ID deleted." elif len(analyses) == 1: message = "Task deleted." if len(analyses) > 0: # Delete dups too. for analysis in analyses: if repconf.mongodb.enabled: # Delete calls. for process in analysis.get("behavior", {}).get("processes", []): for call in process["calls"]: results_db.calls.delete_one( {"_id": ObjectId(call)}) # Delete analysis data. results_db.analysis.delete_one( {"_id": ObjectId(analysis["_id"])}) elif repconf.elasticsearchdb.enabled: delete_analysis_and_related_calls(analysis["info"]["id"]) analyses_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", task_id) if os.path.exists(analyses_path): shutil.rmtree(analyses_path) else: print("nothing found")
def _load_report(task_id: int, return_one: bool = False): if repconf.mongodb.enabled: if return_one: analysis = mongo_find_one("analysis", {"info.id": int(task_id)}, sort=[("_id", -1)]) for process in analysis.get("behavior", {}).get("processes", []): calls = [] for call in process["calls"]: calls.append(ObjectId(call)) process["calls"] = [] for call in mongo_find("calls", {"_id": { "$in": calls }}, sort=[("_id", 1)]) or []: process["calls"] += call["calls"] return analysis else: return mongo_find("analysis", {"info.id": int(task_id)}) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: try: analyses = (es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id), sort={ "info.id": { "order": "desc" } }).get("hits", {}).get("hits", [])) if analyses: if return_one: return analyses[0] else: return analyses except ESRequestError as e: print(e) return False
def cuckoo_clean_sorted_pcap_dump(): """Clean up failed tasks It deletes all stored data from file system and configured databases (SQL and MongoDB for failed tasks. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() if not is_reporting_db_connected(): return if repconf.mongodb.enabled: results_db = connect_to_mongo()[mdb] elif repconf.elasticsearchdb.enabled: es = connect_to_es() done = False while not done: if repconf.mongodb and repconf.mongodb.enabled: rtmp = results_db.analysis.find({"network.sorted_pcap_id": {"$exists": True}}, {"info.id": 1}, sort=[("_id", -1)]).limit( 100 ) elif repconf.elasticsearchdb.enabled: rtmp = [d['_source'] for d in all_docs(index=get_analysis_index(), query={ "query": { "exists": { "field": "network.sorted_pcap_id" } } }, _source=['info.id'])] else: rtmp = 0 if rtmp and len(rtmp) > 0: for e in rtmp: if e["info"]["id"]: log.info((e["info"]["id"])) try: if repconf.mongodb and repconf.mongodb.enabled: results_db.analysis.update( {"info.id": int(e["info"]["id"])}, {"$unset": {"network.sorted_pcap_id": ""}}) elif repconf.elasticsearchdb.enabled: es.update( index=e["index"], id=e["info"]["id"], body={"network.sorted_pcap_id": ""} ) except Exception: log.info(("failed to remove sorted pcap from db for id %s" % (e["info"]["id"]))) try: path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % (e["info"]["id"]), "dump_sorted.pcap") os.remove(path) except Exception as e: log.info(("failed to remove sorted_pcap from disk %s" % (e))) else: done = True else: done = True
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False): # This is the results container. It's what will be used by all the # reporting modules to make it consumable by humans and machines. # It will contain all the results generated by every processing # module available. Its structure can be observed through the JSON # dump in the analysis' reports folder. (If jsondump is enabled.) task_dict = task.to_dict() or {} task_id = task_dict.get("id") or 0 results = {"statistics": {"processing": [], "signatures": [], "reporting": []}} if memory_debugging: gc.collect() log.info("[%s] (1) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if memory_debugging: gc.collect() log.info("[%s] (2) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunProcessing(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (3) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) RunSignatures(task=task_dict, results=results).run() if memory_debugging: gc.collect() log.info("[%s] (4) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) if report: if repconf.mongodb.enabled: conn, mdata, analyses = _load_mongo_report(task_id) if analyses: log.debug("Deleting analysis data for Task %s" % task_id) for analysis in analyses: for process in analysis.get("behavior", {}).get("processes", []): calls = [] for call in process["calls"]: calls.append(ObjectId(call)) mdata.calls.delete_many({"_id": {"$in": calls}}) mdata.analysis.delete_one({"_id": ObjectId(analysis["_id"])}) conn.close() log.debug("Deleted previous MongoDB data for Task %s" % task_id) if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly: try: analyses = es.search( index=get_analysis_index(), query=get_query_by_info_id(task_id) )["hits"]["hits"] if analyses: for analysis in analyses: delete_analysis_and_related_calls(analysis["_id"]) except ESRequestError as e: print(e) if auto or capeproc: reprocess = False else: reprocess = report RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() Database().set_status(task_id, TASK_REPORTED) if auto: if cfg.cuckoo.delete_original and os.path.exists(target): os.unlink(target) if copy_path is not None and cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path): os.unlink(copy_path) if memory_debugging: gc.collect() log.info("[%s] (5) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage)) for i, obj in enumerate(gc.garbage): log.info("[%s] (garbage) GC object #%d: type=%s", task_id, i, type(obj).__name__)
def left(request, left_id): if enabledconf["mongodb"]: left = mongo_find_one("analysis", {"info.id": int(left_id)}, { "target": 1, "info": 1 }) if es_as_db: hits = es.search(index=get_analysis_index(), query=get_query_by_info_id(left_id))["hits"]["hits"] if hits: left = hits[-1]["_source"] else: left = None if not left: return render(request, "error.html", {"error": "No analysis found with specified ID"}) # Select all analyses with same file hash. if enabledconf["mongodb"]: records = mongo_find( "analysis", { "$and": [{ "target.file.md5": left["target"]["file"]["md5"] }, { "info.id": { "$ne": int(left_id) } }] }, { "target": 1, "info": 1 }, ) if es_as_db: records = [] q = { "query": { "bool": { "must": [{ "match": { "target.file.md5": left["target"]["file"]["md5"] } }], "must_not": [{ "match": { "info.id": left_id } }], } } } results = es.search(index=get_analysis_index(), body=q)["hits"]["hits"] for item in results: records.append(item["_source"]) return render(request, "compare/left.html", { "left": left, "records": records })
print(("Found by db.sample_path_by_hash: {}".format(sys.argv[1]))) print(paths) else: if repconf.mongodb.enabled: results_db = pymongo.MongoClient( repconf.mongodb.host, port=repconf.mongodb.port, username=repconf.mongodb.get("username"), password=repconf.mongodb.get("password"), authSource=repconf.mongodb.get("authsource", "cuckoo"), )[repconf.mongodb.db] tasks = results_db.analysis.find({"dropped.sha256": sys.argv[1]}) elif repconf.elasticsearchdb.enabled: from dev_utils.elasticsearchdb import elastic_handler, get_analysis_index tasks = [d['_source'] for d in elastic_handler.search( index=get_analysis_index(), body={ "query": { "match": { "dropped.sha256": sys.argv[1] } } })['hits']['hits']] else: tasks = [] if tasks: for task in tasks: path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(task["info"]["id"]), "files", sys.argv[1]) if os.path.exists(path): paths = [path] print(("Found by dropped in mongo: {}".format(sys.argv[1])))
def statistics(s_days: int) -> dict: date_since = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) - timedelta(days=s_days) date_till = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) details = { "signatures": {}, "processing": {}, "reporting": {}, "top_samples": {}, "detections": {}, } tmp_custom = {} tmp_data = {} if repconf.mongodb.enabled: data = results_db.analysis.find( { "statistics": { "$exists": True }, "info.started": { "$gte": date_since.isoformat() } }, { "statistics": 1, "_id": 0 }) elif repconf.elasticsearchdb.enabled: q = { "query": { "bool": { "must": [{ "exists": { "field": "statistics" } }, { "range": { "info.started": { "gte": date_since.isoformat() } } }] } } } data = [ d["_source"] for d in es.search(index=get_analysis_index(), body=q, _source=["statistics"])["hits"]["hits"] ] else: data = None for analysis in data or []: for type_entry in analysis.get("statistics", []) or []: if type_entry not in tmp_data: tmp_data.setdefault(type_entry, {}) for entry in analysis["statistics"][type_entry]: if entry["name"] in analysis.get("custom_statistics", {}): if entry["name"] not in tmp_custom: tmp_custom.setdefault(entry["name"], {}) if isinstance( analysis["custom_statistics"][entry["name"]], float): tmp_custom[entry["name"]]["time"] = analysis[ "custom_statistics"][entry["name"]] tmp_custom[entry["name"]]["successful"] = 0 else: tmp_custom[entry["name"]]["time"] = analysis[ "custom_statistics"][entry["name"]]["time"] tmp_custom[entry["name"]]["successful"] = analysis[ "custom_statistics"][entry["name"]].get( "extracted", 0) tmp_custom[entry["name"]]["runs"] = 1 else: tmp_custom.setdefault(entry["name"], {}) if isinstance( analysis["custom_statistics"][entry["name"]], float): tmp_custom[entry["name"]]["time"] = analysis[ "custom_statistics"][entry["name"]] tmp_custom[entry["name"]]["successful"] += 0 else: tmp_custom[entry["name"]]["time"] += analysis[ "custom_statistics"][entry["name"]]["time"] tmp_custom[entry["name"]][ "successful"] += analysis["custom_statistics"][ entry["name"]].get("extracted", 0) tmp_custom[entry["name"]]["runs"] += 1 if entry["name"] not in tmp_data[type_entry]: tmp_data[type_entry].setdefault(entry["name"], {}) tmp_data[type_entry][entry["name"]]["time"] = entry["time"] tmp_data[type_entry][entry["name"]]["runs"] = 1 else: tmp_data[type_entry][ entry["name"]]["time"] += entry["time"] tmp_data[type_entry][entry["name"]]["runs"] += 1 if not data: return details for module_name in ["signatures", "processing", "reporting"]: if module_name not in tmp_data: continue # module_data = get_stats_per_category(module_name) s = sorted(tmp_data[module_name].items(), key=lambda x: x[1].get("time"), reverse=True)[:20] for entry in s: entry = entry[0] times_in_mins = tmp_data[module_name][entry]["time"] / 60 if not times_in_mins: continue details[module_name].setdefault(entry, {}) details[module_name][entry]["total"] = float( f"{round(times_in_mins, 2):.2f}") details[module_name][entry]["runs"] = tmp_data[module_name][entry][ "runs"] details[module_name][entry]["average"] = float( f"{round(times_in_mins / tmp_data[module_name][entry]['runs'], 2):.2f}" ) details[module_name] = OrderedDict( sorted(details[module_name].items(), key=lambda x: x[1]["total"], reverse=True)) # custom average for entry in tmp_custom: times_in_mins = tmp_custom[entry]["time"] / 60 if not times_in_mins: continue tmp_custom[entry]["total"] = float(f"{round(times_in_mins, 2):.2f}") tmp_custom[entry]["average"] = float( f"{round(times_in_mins / tmp_custom[entry]['runs'], 2):.2f}") details["custom_signatures"] = OrderedDict( sorted(tmp_custom.items(), key=lambda x: x[1].get("total", "average"), reverse=True)) top_samples = {} session = db.Session() added_tasks = (session.query(Task).join( Sample, Task.sample_id == Sample.id).filter( Task.added_on.between(date_since, date_till)).all()) tasks = (session.query(Task).join(Sample, Task.sample_id == Sample.id).filter( Task.completed_on.between( date_since, date_till)).all()) details["total"] = len(tasks) details["average"] = f"{round(details['total'] / s_days, 2):.2f}" details["tasks"] = {} for task in tasks or []: day = task.completed_on.strftime("%Y-%m-%d") if day not in details["tasks"]: details["tasks"].setdefault(day, {}) details["tasks"][day].setdefault("failed", 0) details["tasks"][day].setdefault("reported", 0) details["tasks"][day].setdefault("added", 0) if day not in top_samples: top_samples.setdefault(day, {}) if task.sample.sha256 not in top_samples[day]: top_samples[day].setdefault(task.sample.sha256, 0) top_samples[day][task.sample.sha256] += 1 # details["tasks"][day]["added"] += 1 if task.status in ("failed_analysis", "failed_reporting", "failed_processing"): details["tasks"][day]["failed"] += 1 elif task.status == "reported": details["tasks"][day]["reported"] += 1 for added_task in added_tasks or []: day = added_task.added_on.strftime("%Y-%m-%d") if day not in details["tasks"]: continue details["tasks"][day]["added"] += 1 details["tasks"] = OrderedDict( sorted(details["tasks"].items(), key=lambda x: datetime.strptime(x[0], "%Y-%m-%d"), reverse=True)) if HAVE_DIST and repconf.distributed.enabled: details["distributed_tasks"] = {} dist_db = dist_session() dist_tasks = dist_db.query(DTask).filter( DTask.clock.between(date_since, date_till)).all() id2name = {} # load node names for node in dist_db.query(Node).all() or []: id2name.setdefault(node.id, node.name) for task in dist_tasks or []: day = task.clock.strftime("%Y-%m-%d") if day not in details["distributed_tasks"]: details["distributed_tasks"].setdefault(day, {}) if id2name.get( task.node_id) not in details["distributed_tasks"][day]: details["distributed_tasks"][day].setdefault( id2name[task.node_id], 0) details["distributed_tasks"][day][id2name[task.node_id]] += 1 dist_db.close() details["distributed_tasks"] = OrderedDict( sorted(details["distributed_tasks"].items(), key=lambda x: x[0], reverse=True)) # Get top15 of samples per day and seen more than once for day in top_samples: if day not in details["top_samples"]: details["top_samples"].setdefault(day, {}) for sha256 in OrderedDict( sorted(top_samples[day].items(), key=lambda x: x[1], reverse=True)[:15]): if top_samples[day][sha256] > 1: details["top_samples"][day][sha256] = top_samples[day][sha256] details["top_samples"][day] = OrderedDict( sorted(details["top_samples"][day].items(), key=lambda x: x[1], reverse=True)) details["top_samples"] = OrderedDict( sorted(details["top_samples"].items(), key=lambda x: datetime.strptime(x[0], "%Y-%m-%d"), reverse=True)) details["detections"] = top_detections(date_since=date_since, results_limit=20) session.close() return details
def top_detections(date_since: datetime = False, results_limit: int = 20) -> dict: t = int(time.time()) # caches results for 10 minutes if hasattr(top_detections, "cache"): ct, data = top_detections.cache if t - ct < 600: return data """function that gets detection: count based on: https://gist.github.com/clarkenheim/fa0f9e5400412b6a0f9d """ aggregation_command = [ { "$match": { "detections": { "$exists": True } } }, { "$group": { "_id": "$detections", "total": { "$sum": 1 } } }, { "$sort": { "total": -1 } }, { "$addFields": { "family": "$_id" } }, { "$project": { "_id": 0 } }, { "$limit": results_limit }, ] if date_since: aggregation_command[0]["$match"].setdefault( "info.started", {"$gte": date_since.isoformat()}) if repconf.mongodb.enabled: data = results_db.analysis.aggregate(aggregation_command) elif repconf.elasticsearchdb.enabled: q = { "query": { "bool": { "must": [{ "exists": { "field": "detections" } }] } }, "size": 0, "aggs": { "family": { "terms": { "field": "detections.keyword", "size": results_limit } } }, } if date_since: q["query"]["bool"]["must"].append( {"range": { "info.started": { "gte": date_since.isoformat() } }}) print(q) res = es.search(index=get_analysis_index(), body=q) data = [{ "total": r["doc_count"], "family": r["key"] } for r in res["aggregations"]["family"]["buckets"]] else: data = False if data: data = list(data) # save to cache top_detections.cache = (t, data) return data
def perform_search(term, value, search_limit=False): if repconf.mongodb.enabled and repconf.elasticsearchdb.enabled and essearch and not term: multi_match_search = { "query": { "multi_match": { "query": value, "fields": ["*"] } } } numhits = es.search(index=get_analysis_index(), body=multi_match_search, size=0)["hits"]["total"] return [ d["_source"] for d in es.search(index=get_analysis_index(), body=multi_match_search, sort="task_id:desc", size=numhits)["hits"]["hits"] ] query_val = False if term in normalized_lower_terms: query_val = value.lower() elif term in normalized_int_terms: query_val = int(value) elif term in ("surisid", "id"): try: query_val = int(value) except Exception: pass elif term in ("ids", "options", "tags_tasks"): try: ids = [] if term == "ids": ids = value elif term == "tags_tasks": ids = [int(v.id) for v in db.list_tasks(tags_tasks_like=value)] else: ids = [int(v.id) for v in db.list_tasks(options_like=value)] if ids: if len(ids) > 1: query_val = {"$in": ids} else: term = "id" if isinstance(value, list): value = value[0] query_val = int(value) except Exception as e: print(term, value, e) else: query_val = {"$regex": value, "$options": "-i"} if term not in search_term_map: return None if not search_limit: search_limit = web_cfg.general.get("search_limit", 50) if term == "payloads" and len(value) in (32, 40, 64, 128): search_term_map[term] = f"CAPE.payloads.{hash_len.get(len(value))}" elif term == "configs": # check if family name is string only maybe? search_term_map[term] = f"CAPE.configs.{value}" query_val = {"$exists": True} if repconf.mongodb.enabled and query_val: if isinstance(search_term_map[term], str): mongo_search_query = {search_term_map[term]: query_val} else: mongo_search_query = { "$or": [{ search_term: query_val } for search_term in search_term_map[term]] } return (results_db.analysis.find( mongo_search_query, perform_search_filters).sort( [["_id", -1]]).limit(web_cfg.general.get("search_limit", 50))) if es_as_db: _source_fields = list(perform_search_filters.keys())[:-1] if isinstance(search_term_map[term], str): q = {"query": {"match": {search_term_map[term]: value}}} return [ d["_source"] for d in es.search(index=get_analysis_index(), body=q, _source=_source_fields)["hits"]["hits"] ] else: queries = [{ "match": { search_term: value } } for search_term in search_term_map[term]] q = { "query": { "bool": { "should": queries, "minimum_should_match": 1 } } } return [ d["_source"] for d in es.search(index=get_analysis_index(), body=q, _source=_source_fields)["hits"]["hits"] ]