Beispiel #1
0
def hash(request, left_id, right_hash):
    if enabledconf["mongodb"]:
        left = results_db.analysis.find_one({"info.id": int(left_id)}, {"target": 1, "info": 1})
    if es_as_db:
        hits = es.search(
            index=get_analysis_index(),
            query=get_query_by_info_id(left_id)
        )["hits"]["hits"]
        if hits:
            left = hits[-1]["_source"]
        else:
            left = None
    if not left:
        return render(request, "error.html", {"error": "No analysis found with specified ID"})

    # Select all analyses with same file hash.
    if enabledconf["mongodb"]:
        records = results_db.analysis.find(
            {"$and": [{"target.file.md5": left["target"]["file"]["md5"]}, {"info.id": {"$ne": int(left_id)}}]},
            {"target": 1, "info": 1},
        )
    if es_as_db:
        records = []
        q = {'query': {'bool': {
            'must': [{'match': {'target.file.md5': right_hash}}],
            'must_not': [{'match': {'info.id': left_id}}],
        }}}
        results = es.search(index=get_analysis_index(), body=q)["hits"]["hits"]
        for item in results:
            records.append(item["_source"])

    # Select all analyses with specified file hash.
    return render(request, "compare/hash.html", {"left": left, "records": records, "hash": right_hash})
Beispiel #2
0
def both(request, left_id, right_id):
    if enabledconf["mongodb"]:
        left = results_db.analysis.find_one({"info.id": int(left_id)}, {"target": 1, "info": 1, "summary": 1})
        right = results_db.analysis.find_one({"info.id": int(right_id)}, {"target": 1, "info": 1, "summary": 1})
        # Execute comparison.
        counts = compare.helper_percentages_mongo(results_db, left_id, right_id)
        summary_compare = compare.helper_summary_mongo(results_db, left_id, right_id)
    elif es_as_db:
        left = es.search(
            index=get_analysis_index(),
            query=get_query_by_info_id(left_id),
            _source=['target', 'info']
        )["hits"]["hits"][-1]["_source"]
        right = es.search(
            index=get_analysis_index(),
            query=get_query_by_info_id(right_id),
            _source=['target', 'info']
        )["hits"]["hits"][-1]["_source"]
        counts = compare.helper_percentages_elastic(es, left_id, right_id)
        summary_compare = compare.helper_summary_elastic(es, left_id, right_id)

    return render(
        request,
        "compare/both.html",
        {
            "left": left,
            "right": right,
            "left_counts": counts[left_id],
            "right_counts": counts[right_id],
            "summary": summary_compare,
        },
    )
Beispiel #3
0
def helper_summary_elastic(es_obj, tid1, tid2):
    left_sum, right_sum = None, None
    buf = es_obj.search(index=get_analysis_index(),
                        query=get_query_by_info_id(tid1))["hits"]["hits"]
    if buf:
        left_sum = buf[-1]["_source"]

    buf = es_obj.search(index=get_analysis_index(),
                        query=get_query_by_info_id(tid2))["hits"]["hits"]
    if buf:
        right_sum = buf[-1]["_source"]

    return get_similar_summary(left_sum,
                               right_sum) if left_sum and right_sum else {}
Beispiel #4
0
def helper_percentages_elastic(es_obj, tid1, tid2, ignore_categories=["misc"]):
    counts = {}

    for tid in [tid1, tid2]:
        counts[tid] = {}
        results = es_obj.search(
            index=get_analysis_index(),
            query=get_query_by_info_id(tid))["hits"]["hits"]
        if results:
            pids_calls = results[-1]["_source"]
        else:
            pids_calls = None

        if not pids_calls:
            continue

        for pdoc in pids_calls["behavior"]["processes"]:
            pid = pdoc["process_id"]
            counts[tid][pid] = {}

            for coid in pdoc["calls"]:
                chunk = es_obj.search(index=get_calls_index(),
                                      body={"query": {
                                          "match": {
                                              "_id": coid
                                          }
                                      }})["hits"]["hits"][-1]["_source"]
                category_counts = behavior_categories_percent(chunk["calls"])
                for cat, count in category_counts.items():
                    if cat in ignore_categories:
                        continue
                    counts[tid][pid][cat] = counts[tid][pid].get(cat,
                                                                 0) + count

    return combine_behavior_percentages(counts)
Beispiel #5
0
def cuckoo_clean_lower_score(args):
    """Clean up tasks with score <= X
    It deletes all stored data from file system and configured databases (SQL
    and MongoDB for tasks.
    """
    # Init logging.
    # This need to init a console logger handler, because the standard
    # logger (init_logging()) logs to a file which will be deleted.
    if not args.malscore:
        log.info("No malscore argument provided bailing")
        return

    create_structure()
    init_console_logging()
    id_arr = []
    if not is_reporting_db_connected():
        return

    if repconf.mongodb and repconf.mongodb.enabled:
        results_db = connect_to_mongo()[mdb]
        result = list(results_db.analysis.find({"malscore": {"$lte": args.malscore}}))
        id_arr = [entry["info"]["id"] for entry in result]
    elif repconf.elasticsearchdb.enabled:
        id_arr = [d["_source"]["info"]["id"] for d in all_docs(
            index=get_analysis_index(), query={
                "query": {
                    "range": {
                      "malscore": {
                        "lte": args.malscore
                      }
                    }
                }
            }, _source=["info.id"])]
    log.info(("number of matching records %s" % len(id_arr)))
    resolver_pool.map(lambda tid: delete_data(tid), id_arr)
Beispiel #6
0
def static_config_lookup(file_path, sha256=False):
    if not sha256:
        sha256 = hashlib.sha256(open(file_path, "rb").read()).hexdigest()

    if repconf.mongodb.enabled:
        document_dict = results_db.analysis.find_one(
            {"target.file.sha256": sha256}, {
                "CAPE.configs": 1,
                "info.id": 1,
                "_id": 0
            },
            sort=[("_id", pymongo.DESCENDING)])
    elif repconf.elasticsearchdb.enabled:
        document_dict = es.search(
            index=get_analysis_index(),
            body={"query": {
                "match": {
                    "target.file.sha256": sha256
                }
            }},
            _source=["CAPE.configs", "info.id"],
            sort={"_id": {
                "order": "desc"
            }},
        )["hits"]["hits"][0]["_source"]
    else:
        document_dict = None

    if not document_dict:
        return

    has_config = document_dict.get("CAPE", {}).get("configs", [])
    if has_config:
        return document_dict["info"]
Beispiel #7
0
def cuckoo_clean():
    """Clean up cuckoo setup.
    It deletes logs, all stored data from file system and configured databases (SQL
    and MongoDB.
    """
    # Init logging.
    # This need to init a console logger handler, because the standard
    # logger (init_logging()) logs to a file which will be deleted.
    create_structure()
    init_console_logging()

    # Drop all tables.
    db.drop()

    if repconf.mongodb.enabled:
        mongo_drop_database(mdb)

    elif repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly:
        analyses = all_docs(index=get_analysis_index(),
                            query={"query": {
                                "match_all": {}
                            }},
                            _source=["info.id"])
        if analyses:
            for analysis in analyses:
                delete_analysis_and_related_calls(
                    analysis["_source"]["info"]["id"])

    # Paths to clean.
    paths = [
        os.path.join(CUCKOO_ROOT, "db"),
        os.path.join(CUCKOO_ROOT, "log"),
        os.path.join(CUCKOO_ROOT, "storage"),
    ]

    # Delete various directories.
    for path in paths:
        if os.path.isdir(path):
            try:
                shutil.rmtree(path)
            except (IOError, OSError) as e:
                log.warning("Error removing directory %s: %s", path, e)

    # Delete all compiled Python objects ("*.pyc").
    for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT):
        for fname in filenames:
            if not fname.endswith(".pyc"):
                continue

            path = os.path.join(CUCKOO_ROOT, dirpath, fname)

            try:
                os.unlink(path)
            except (IOError, OSError) as e:
                log.warning("Error removing file %s: %s", path, e)
Beispiel #8
0
def perform_malscore_search(value):
    if repconf.mongodb.enabled:
        return results_db.analysis.find({
            "malscore": {
                "$gte": float(value)
            }
        }, perform_search_filters).sort([["_id", -1]])
    elif repconf.elasticsearchdb.enabled:
        q = {"query": {"range": {"malscore": {"gte": float(value)}}}}
        _source_fields = list(perform_search_filters.keys())[:-1]
        return es.search(index=get_analysis_index(),
                         body=q,
                         _source=_source_fields)["hits"]["hits"]
Beispiel #9
0
def cuckoo_clean_failed_url_tasks():
    """Clean up failed tasks
    It deletes all stored data from file system and configured databases (SQL
    and MongoDB for failed tasks.
    """
    # Init logging.
    # This need to init a console logger handler, because the standard
    # logger (init_logging()) logs to a file which will be deleted.
    create_structure()
    init_console_logging()
    if not is_reporting_db_connected():
        return

    if repconf.mongodb.enabled:
        rtmp = mongo_find("analysis", {
            "info.category": "url",
            "network.http.0": {
                "$exists": False
            }
        }, {
            "info.id": 1
        },
                          sort=[("_id", -1)]).limit(100)
    elif repconf.elasticsearchdb.enabled:
        rtmp = [
            d["_source"] for d in all_docs(
                index=get_analysis_index(),
                query={
                    "query": {
                        "bool": {
                            "must": [{
                                "exists": {
                                    "field": "network.http"
                                }
                            }, {
                                "match": {
                                    "info.category": "url"
                                }
                            }]
                        }
                    }
                },
                _source=["info.id"],
            )
        ]
    else:
        rtmp = []

    if rtmp and len(rtmp) > 0:
        resolver_pool.map(lambda tid: delete_data(tid), rtmp)
Beispiel #10
0
def perform_ttps_search(value):
    if len(value) == 5 and value.upper().startswith(
            "T") and value[1:].isdigit():
        if repconf.mongodb.enabled:
            return results_db.analysis.find(
                {
                    f"ttps.{value.upper()}": {
                        "$exist": 1
                    }
                }, {
                    "info.id": 1,
                    "_id": 0
                }).sort([["_id", -1]])
        elif repconf.elasticsearchdb.enabled:
            q = {"query": {"match": {"ttps.ttp": value.upper()}}}
            return es.search(index=get_analysis_index(),
                             body=q)["hits"]["hits"]
Beispiel #11
0
def remove(task_id):
    if repconf.mongodb.enabled or repconf.elasticsearchdb.enabled:
        if repconf.mongodb.enabled:
            analyses = list(
                results_db.analysis.find({"info.id": int(task_id)}, {
                    "_id": 1,
                    "behavior.processes": 1
                }))
        elif repconf.elasticsearchdb.enabled:
            analyses = [
                d["_source"] for d in es.search(
                    index=get_analysis_index(),
                    query=get_query_by_info_id(task_id),
                    _source=["behavior.processes"])["hits"]["hits"]
            ]
        else:
            analyses = []

        if len(analyses) > 1:
            message = "Multiple tasks with this ID deleted."
        elif len(analyses) == 1:
            message = "Task deleted."

        if len(analyses) > 0:
            # Delete dups too.
            for analysis in analyses:
                if repconf.mongodb.enabled:
                    # Delete calls.
                    for process in analysis.get("behavior",
                                                {}).get("processes", []):
                        for call in process["calls"]:
                            results_db.calls.delete_one(
                                {"_id": ObjectId(call)})
                    # Delete analysis data.
                    results_db.analysis.delete_one(
                        {"_id": ObjectId(analysis["_id"])})
                elif repconf.elasticsearchdb.enabled:
                    delete_analysis_and_related_calls(analysis["info"]["id"])

            analyses_path = os.path.join(CUCKOO_ROOT, "storage", "analyses",
                                         task_id)
            if os.path.exists(analyses_path):
                shutil.rmtree(analyses_path)
        else:
            print("nothing found")
Beispiel #12
0
def _load_report(task_id: int, return_one: bool = False):

    if repconf.mongodb.enabled:
        if return_one:
            analysis = mongo_find_one("analysis", {"info.id": int(task_id)},
                                      sort=[("_id", -1)])
            for process in analysis.get("behavior", {}).get("processes", []):
                calls = []
                for call in process["calls"]:
                    calls.append(ObjectId(call))
                process["calls"] = []
                for call in mongo_find("calls", {"_id": {
                        "$in": calls
                }},
                                       sort=[("_id", 1)]) or []:
                    process["calls"] += call["calls"]
            return analysis

        else:
            return mongo_find("analysis", {"info.id": int(task_id)})

    if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly:
        try:
            analyses = (es.search(index=get_analysis_index(),
                                  query=get_query_by_info_id(task_id),
                                  sort={
                                      "info.id": {
                                          "order": "desc"
                                      }
                                  }).get("hits", {}).get("hits", []))
            if analyses:
                if return_one:
                    return analyses[0]
                else:
                    return analyses
        except ESRequestError as e:
            print(e)

    return False
Beispiel #13
0
def cuckoo_clean_sorted_pcap_dump():
    """Clean up failed tasks
    It deletes all stored data from file system and configured databases (SQL
    and MongoDB for failed tasks.
    """
    # Init logging.
    # This need to init a console logger handler, because the standard
    # logger (init_logging()) logs to a file which will be deleted.
    create_structure()
    init_console_logging()

    if not is_reporting_db_connected():
        return

    if repconf.mongodb.enabled:
        results_db = connect_to_mongo()[mdb]
    elif repconf.elasticsearchdb.enabled:
        es = connect_to_es()

    done = False

    while not done:
        if repconf.mongodb and repconf.mongodb.enabled:
            rtmp = results_db.analysis.find({"network.sorted_pcap_id": {"$exists": True}}, {"info.id": 1}, sort=[("_id", -1)]).limit(
                100
            )
        elif repconf.elasticsearchdb.enabled:
            rtmp = [d['_source'] for d in
                    all_docs(index=get_analysis_index(), query={
                        "query": {
                            "exists": {
                                "field": "network.sorted_pcap_id"
                            }
                        }
                    }, _source=['info.id'])]
        else:
            rtmp = 0

        if rtmp and len(rtmp) > 0:
            for e in rtmp:
                if e["info"]["id"]:
                    log.info((e["info"]["id"]))
                    try:
                        if repconf.mongodb and repconf.mongodb.enabled:
                            results_db.analysis.update(
                                {"info.id": int(e["info"]["id"])},
                                {"$unset": {"network.sorted_pcap_id": ""}})
                        elif repconf.elasticsearchdb.enabled:
                            es.update(
                                index=e["index"], id=e["info"]["id"],
                                body={"network.sorted_pcap_id": ""}
                            )
                    except Exception:
                        log.info(("failed to remove sorted pcap from db for id %s" % (e["info"]["id"])))
                    try:
                        path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % (e["info"]["id"]), "dump_sorted.pcap")
                        os.remove(path)
                    except Exception as e:
                        log.info(("failed to remove sorted_pcap from disk %s" % (e)))
                else:
                    done = True
        else:
            done = True
Beispiel #14
0
def process(target=None, copy_path=None, task=None, report=False, auto=False, capeproc=False, memory_debugging=False):
    # This is the results container. It's what will be used by all the
    # reporting modules to make it consumable by humans and machines.
    # It will contain all the results generated by every processing
    # module available. Its structure can be observed through the JSON
    # dump in the analysis' reports folder. (If jsondump is enabled.)
    task_dict = task.to_dict() or {}
    task_id = task_dict.get("id") or 0
    results = {"statistics": {"processing": [], "signatures": [], "reporting": []}}
    if memory_debugging:
        gc.collect()
        log.info("[%s] (1) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage))
    if memory_debugging:
        gc.collect()
        log.info("[%s] (2) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage))
    RunProcessing(task=task_dict, results=results).run()
    if memory_debugging:
        gc.collect()
        log.info("[%s] (3) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage))

    RunSignatures(task=task_dict, results=results).run()
    if memory_debugging:
        gc.collect()
        log.info("[%s] (4) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage))

    if report:
        if repconf.mongodb.enabled:
            conn, mdata, analyses = _load_mongo_report(task_id)
            if analyses:
                log.debug("Deleting analysis data for Task %s" % task_id)
                for analysis in analyses:
                    for process in analysis.get("behavior", {}).get("processes", []):
                        calls = []
                        for call in process["calls"]:
                            calls.append(ObjectId(call))
                        mdata.calls.delete_many({"_id": {"$in": calls}})
                    mdata.analysis.delete_one({"_id": ObjectId(analysis["_id"])})
            conn.close()
            log.debug("Deleted previous MongoDB data for Task %s" % task_id)

        if repconf.elasticsearchdb.enabled and not repconf.elasticsearchdb.searchonly:
            try:
                analyses = es.search(
                    index=get_analysis_index(), query=get_query_by_info_id(task_id)
                )["hits"]["hits"]
                if analyses:
                    for analysis in analyses:
                        delete_analysis_and_related_calls(analysis["_id"])
            except ESRequestError as e:
                print(e)

        if auto or capeproc:
            reprocess = False
        else:
            reprocess = report

        RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run()
        Database().set_status(task_id, TASK_REPORTED)

        if auto:
            if cfg.cuckoo.delete_original and os.path.exists(target):
                os.unlink(target)

            if copy_path is not None and cfg.cuckoo.delete_bin_copy and os.path.exists(copy_path):
                os.unlink(copy_path)

    if memory_debugging:
        gc.collect()
        log.info("[%s] (5) GC object counts: %d, %d", task_id, len(gc.get_objects()), len(gc.garbage))
        for i, obj in enumerate(gc.garbage):
            log.info("[%s] (garbage) GC object #%d: type=%s", task_id, i, type(obj).__name__)
Beispiel #15
0
def left(request, left_id):
    if enabledconf["mongodb"]:
        left = mongo_find_one("analysis", {"info.id": int(left_id)}, {
            "target": 1,
            "info": 1
        })
    if es_as_db:
        hits = es.search(index=get_analysis_index(),
                         query=get_query_by_info_id(left_id))["hits"]["hits"]
        if hits:
            left = hits[-1]["_source"]
        else:
            left = None
    if not left:
        return render(request, "error.html",
                      {"error": "No analysis found with specified ID"})

    # Select all analyses with same file hash.
    if enabledconf["mongodb"]:
        records = mongo_find(
            "analysis",
            {
                "$and": [{
                    "target.file.md5": left["target"]["file"]["md5"]
                }, {
                    "info.id": {
                        "$ne": int(left_id)
                    }
                }]
            },
            {
                "target": 1,
                "info": 1
            },
        )
    if es_as_db:
        records = []
        q = {
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "target.file.md5": left["target"]["file"]["md5"]
                        }
                    }],
                    "must_not": [{
                        "match": {
                            "info.id": left_id
                        }
                    }],
                }
            }
        }
        results = es.search(index=get_analysis_index(), body=q)["hits"]["hits"]
        for item in results:
            records.append(item["_source"])

    return render(request, "compare/left.html", {
        "left": left,
        "records": records
    })
Beispiel #16
0
            print(("Found by db.sample_path_by_hash: {}".format(sys.argv[1])))
            print(paths)
    else:
        if repconf.mongodb.enabled:
            results_db = pymongo.MongoClient(
                repconf.mongodb.host,
                port=repconf.mongodb.port,
                username=repconf.mongodb.get("username"),
                password=repconf.mongodb.get("password"),
                authSource=repconf.mongodb.get("authsource", "cuckoo"),
            )[repconf.mongodb.db]
            tasks = results_db.analysis.find({"dropped.sha256": sys.argv[1]})
        elif repconf.elasticsearchdb.enabled:
            from dev_utils.elasticsearchdb import elastic_handler, get_analysis_index
            tasks = [d['_source'] for d in elastic_handler.search(
                index=get_analysis_index(), body={
                    "query": {
                        "match": {
                            "dropped.sha256": sys.argv[1]
                        }
                    }
                })['hits']['hits']]
        else:
            tasks = []

        if tasks:
            for task in tasks:
                path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(task["info"]["id"]), "files", sys.argv[1])
                if os.path.exists(path):
                    paths = [path]
                    print(("Found by dropped in mongo: {}".format(sys.argv[1])))
Beispiel #17
0
def statistics(s_days: int) -> dict:
    date_since = datetime.now().replace(
        hour=0, minute=0, second=0, microsecond=0) - timedelta(days=s_days)
    date_till = datetime.now().replace(hour=0,
                                       minute=0,
                                       second=0,
                                       microsecond=0)

    details = {
        "signatures": {},
        "processing": {},
        "reporting": {},
        "top_samples": {},
        "detections": {},
    }

    tmp_custom = {}
    tmp_data = {}
    if repconf.mongodb.enabled:
        data = results_db.analysis.find(
            {
                "statistics": {
                    "$exists": True
                },
                "info.started": {
                    "$gte": date_since.isoformat()
                }
            }, {
                "statistics": 1,
                "_id": 0
            })
    elif repconf.elasticsearchdb.enabled:
        q = {
            "query": {
                "bool": {
                    "must": [{
                        "exists": {
                            "field": "statistics"
                        }
                    }, {
                        "range": {
                            "info.started": {
                                "gte": date_since.isoformat()
                            }
                        }
                    }]
                }
            }
        }
        data = [
            d["_source"]
            for d in es.search(index=get_analysis_index(),
                               body=q,
                               _source=["statistics"])["hits"]["hits"]
        ]
    else:
        data = None

    for analysis in data or []:
        for type_entry in analysis.get("statistics", []) or []:
            if type_entry not in tmp_data:
                tmp_data.setdefault(type_entry, {})
            for entry in analysis["statistics"][type_entry]:
                if entry["name"] in analysis.get("custom_statistics", {}):
                    if entry["name"] not in tmp_custom:
                        tmp_custom.setdefault(entry["name"], {})
                        if isinstance(
                                analysis["custom_statistics"][entry["name"]],
                                float):
                            tmp_custom[entry["name"]]["time"] = analysis[
                                "custom_statistics"][entry["name"]]
                            tmp_custom[entry["name"]]["successful"] = 0
                        else:
                            tmp_custom[entry["name"]]["time"] = analysis[
                                "custom_statistics"][entry["name"]]["time"]
                            tmp_custom[entry["name"]]["successful"] = analysis[
                                "custom_statistics"][entry["name"]].get(
                                    "extracted", 0)
                        tmp_custom[entry["name"]]["runs"] = 1

                    else:
                        tmp_custom.setdefault(entry["name"], {})
                        if isinstance(
                                analysis["custom_statistics"][entry["name"]],
                                float):
                            tmp_custom[entry["name"]]["time"] = analysis[
                                "custom_statistics"][entry["name"]]
                            tmp_custom[entry["name"]]["successful"] += 0
                        else:
                            tmp_custom[entry["name"]]["time"] += analysis[
                                "custom_statistics"][entry["name"]]["time"]
                            tmp_custom[entry["name"]][
                                "successful"] += analysis["custom_statistics"][
                                    entry["name"]].get("extracted", 0)
                        tmp_custom[entry["name"]]["runs"] += 1
                if entry["name"] not in tmp_data[type_entry]:
                    tmp_data[type_entry].setdefault(entry["name"], {})
                    tmp_data[type_entry][entry["name"]]["time"] = entry["time"]
                    tmp_data[type_entry][entry["name"]]["runs"] = 1
                else:
                    tmp_data[type_entry][
                        entry["name"]]["time"] += entry["time"]
                    tmp_data[type_entry][entry["name"]]["runs"] += 1

    if not data:
        return details

    for module_name in ["signatures", "processing", "reporting"]:
        if module_name not in tmp_data:
            continue
        # module_data = get_stats_per_category(module_name)
        s = sorted(tmp_data[module_name].items(),
                   key=lambda x: x[1].get("time"),
                   reverse=True)[:20]

        for entry in s:
            entry = entry[0]
            times_in_mins = tmp_data[module_name][entry]["time"] / 60
            if not times_in_mins:
                continue
            details[module_name].setdefault(entry, {})
            details[module_name][entry]["total"] = float(
                f"{round(times_in_mins, 2):.2f}")
            details[module_name][entry]["runs"] = tmp_data[module_name][entry][
                "runs"]
            details[module_name][entry]["average"] = float(
                f"{round(times_in_mins / tmp_data[module_name][entry]['runs'], 2):.2f}"
            )
        details[module_name] = OrderedDict(
            sorted(details[module_name].items(),
                   key=lambda x: x[1]["total"],
                   reverse=True))

    # custom average
    for entry in tmp_custom:
        times_in_mins = tmp_custom[entry]["time"] / 60
        if not times_in_mins:
            continue
        tmp_custom[entry]["total"] = float(f"{round(times_in_mins, 2):.2f}")
        tmp_custom[entry]["average"] = float(
            f"{round(times_in_mins / tmp_custom[entry]['runs'], 2):.2f}")

    details["custom_signatures"] = OrderedDict(
        sorted(tmp_custom.items(),
               key=lambda x: x[1].get("total", "average"),
               reverse=True))

    top_samples = {}
    session = db.Session()
    added_tasks = (session.query(Task).join(
        Sample, Task.sample_id == Sample.id).filter(
            Task.added_on.between(date_since, date_till)).all())
    tasks = (session.query(Task).join(Sample,
                                      Task.sample_id == Sample.id).filter(
                                          Task.completed_on.between(
                                              date_since, date_till)).all())
    details["total"] = len(tasks)
    details["average"] = f"{round(details['total'] / s_days, 2):.2f}"
    details["tasks"] = {}
    for task in tasks or []:
        day = task.completed_on.strftime("%Y-%m-%d")
        if day not in details["tasks"]:
            details["tasks"].setdefault(day, {})
            details["tasks"][day].setdefault("failed", 0)
            details["tasks"][day].setdefault("reported", 0)
            details["tasks"][day].setdefault("added", 0)
        if day not in top_samples:
            top_samples.setdefault(day, {})
        if task.sample.sha256 not in top_samples[day]:
            top_samples[day].setdefault(task.sample.sha256, 0)
        top_samples[day][task.sample.sha256] += 1
        # details["tasks"][day]["added"] += 1
        if task.status in ("failed_analysis", "failed_reporting",
                           "failed_processing"):
            details["tasks"][day]["failed"] += 1
        elif task.status == "reported":
            details["tasks"][day]["reported"] += 1

    for added_task in added_tasks or []:
        day = added_task.added_on.strftime("%Y-%m-%d")
        if day not in details["tasks"]:
            continue
        details["tasks"][day]["added"] += 1

    details["tasks"] = OrderedDict(
        sorted(details["tasks"].items(),
               key=lambda x: datetime.strptime(x[0], "%Y-%m-%d"),
               reverse=True))

    if HAVE_DIST and repconf.distributed.enabled:
        details["distributed_tasks"] = {}
        dist_db = dist_session()
        dist_tasks = dist_db.query(DTask).filter(
            DTask.clock.between(date_since, date_till)).all()
        id2name = {}
        # load node names
        for node in dist_db.query(Node).all() or []:
            id2name.setdefault(node.id, node.name)

        for task in dist_tasks or []:
            day = task.clock.strftime("%Y-%m-%d")
            if day not in details["distributed_tasks"]:
                details["distributed_tasks"].setdefault(day, {})
            if id2name.get(
                    task.node_id) not in details["distributed_tasks"][day]:
                details["distributed_tasks"][day].setdefault(
                    id2name[task.node_id], 0)
            details["distributed_tasks"][day][id2name[task.node_id]] += 1
        dist_db.close()

        details["distributed_tasks"] = OrderedDict(
            sorted(details["distributed_tasks"].items(),
                   key=lambda x: x[0],
                   reverse=True))

    # Get top15 of samples per day and seen more than once
    for day in top_samples:
        if day not in details["top_samples"]:
            details["top_samples"].setdefault(day, {})
        for sha256 in OrderedDict(
                sorted(top_samples[day].items(),
                       key=lambda x: x[1],
                       reverse=True)[:15]):
            if top_samples[day][sha256] > 1:
                details["top_samples"][day][sha256] = top_samples[day][sha256]

        details["top_samples"][day] = OrderedDict(
            sorted(details["top_samples"][day].items(),
                   key=lambda x: x[1],
                   reverse=True))
    details["top_samples"] = OrderedDict(
        sorted(details["top_samples"].items(),
               key=lambda x: datetime.strptime(x[0], "%Y-%m-%d"),
               reverse=True))

    details["detections"] = top_detections(date_since=date_since,
                                           results_limit=20)

    session.close()
    return details
Beispiel #18
0
def top_detections(date_since: datetime = False,
                   results_limit: int = 20) -> dict:

    t = int(time.time())

    # caches results for 10 minutes
    if hasattr(top_detections, "cache"):
        ct, data = top_detections.cache
        if t - ct < 600:
            return data
    """function that gets detection: count
    based on: https://gist.github.com/clarkenheim/fa0f9e5400412b6a0f9d
    """

    aggregation_command = [
        {
            "$match": {
                "detections": {
                    "$exists": True
                }
            }
        },
        {
            "$group": {
                "_id": "$detections",
                "total": {
                    "$sum": 1
                }
            }
        },
        {
            "$sort": {
                "total": -1
            }
        },
        {
            "$addFields": {
                "family": "$_id"
            }
        },
        {
            "$project": {
                "_id": 0
            }
        },
        {
            "$limit": results_limit
        },
    ]

    if date_since:
        aggregation_command[0]["$match"].setdefault(
            "info.started", {"$gte": date_since.isoformat()})

    if repconf.mongodb.enabled:
        data = results_db.analysis.aggregate(aggregation_command)
    elif repconf.elasticsearchdb.enabled:
        q = {
            "query": {
                "bool": {
                    "must": [{
                        "exists": {
                            "field": "detections"
                        }
                    }]
                }
            },
            "size": 0,
            "aggs": {
                "family": {
                    "terms": {
                        "field": "detections.keyword",
                        "size": results_limit
                    }
                }
            },
        }

        if date_since:
            q["query"]["bool"]["must"].append(
                {"range": {
                    "info.started": {
                        "gte": date_since.isoformat()
                    }
                }})

            print(q)
        res = es.search(index=get_analysis_index(), body=q)
        data = [{
            "total": r["doc_count"],
            "family": r["key"]
        } for r in res["aggregations"]["family"]["buckets"]]
    else:
        data = False

    if data:
        data = list(data)

    # save to cache
    top_detections.cache = (t, data)

    return data
Beispiel #19
0
def perform_search(term, value, search_limit=False):
    if repconf.mongodb.enabled and repconf.elasticsearchdb.enabled and essearch and not term:
        multi_match_search = {
            "query": {
                "multi_match": {
                    "query": value,
                    "fields": ["*"]
                }
            }
        }
        numhits = es.search(index=get_analysis_index(),
                            body=multi_match_search,
                            size=0)["hits"]["total"]
        return [
            d["_source"] for d in es.search(index=get_analysis_index(),
                                            body=multi_match_search,
                                            sort="task_id:desc",
                                            size=numhits)["hits"]["hits"]
        ]

    query_val = False
    if term in normalized_lower_terms:
        query_val = value.lower()
    elif term in normalized_int_terms:
        query_val = int(value)
    elif term in ("surisid", "id"):
        try:
            query_val = int(value)
        except Exception:
            pass
    elif term in ("ids", "options", "tags_tasks"):
        try:
            ids = []
            if term == "ids":
                ids = value
            elif term == "tags_tasks":
                ids = [int(v.id) for v in db.list_tasks(tags_tasks_like=value)]
            else:
                ids = [int(v.id) for v in db.list_tasks(options_like=value)]
            if ids:
                if len(ids) > 1:
                    query_val = {"$in": ids}
                else:
                    term = "id"
                    if isinstance(value, list):
                        value = value[0]
                    query_val = int(value)
        except Exception as e:
            print(term, value, e)
    else:
        query_val = {"$regex": value, "$options": "-i"}

    if term not in search_term_map:
        return None

    if not search_limit:
        search_limit = web_cfg.general.get("search_limit", 50)

    if term == "payloads" and len(value) in (32, 40, 64, 128):
        search_term_map[term] = f"CAPE.payloads.{hash_len.get(len(value))}"

    elif term == "configs":
        # check if family name is string only maybe?
        search_term_map[term] = f"CAPE.configs.{value}"
        query_val = {"$exists": True}

    if repconf.mongodb.enabled and query_val:
        if isinstance(search_term_map[term], str):
            mongo_search_query = {search_term_map[term]: query_val}
        else:
            mongo_search_query = {
                "$or": [{
                    search_term: query_val
                } for search_term in search_term_map[term]]
            }
        return (results_db.analysis.find(
            mongo_search_query, perform_search_filters).sort(
                [["_id", -1]]).limit(web_cfg.general.get("search_limit", 50)))
    if es_as_db:
        _source_fields = list(perform_search_filters.keys())[:-1]
        if isinstance(search_term_map[term], str):
            q = {"query": {"match": {search_term_map[term]: value}}}
            return [
                d["_source"]
                for d in es.search(index=get_analysis_index(),
                                   body=q,
                                   _source=_source_fields)["hits"]["hits"]
            ]
        else:
            queries = [{
                "match": {
                    search_term: value
                }
            } for search_term in search_term_map[term]]
            q = {
                "query": {
                    "bool": {
                        "should": queries,
                        "minimum_should_match": 1
                    }
                }
            }
            return [
                d["_source"]
                for d in es.search(index=get_analysis_index(),
                                   body=q,
                                   _source=_source_fields)["hits"]["hits"]
            ]