def assert_query(self, yara_path: Path, results_path: Path) -> None:
     expected_data = results_path.read_text()
     try:
         rules = parse_yara(yara_path.read_text())
         self.assertEqual(expected_data, combine_rules(rules).query + "\n")
     except Exception as e:
         self.assertEqual(expected_data, str(e) + "\n")
def write_rules_to_file(data, result_txt):
    rules = []
    try:
        rules = parse_yara(data)
        with open(result_txt, "w") as fp:
            fp.write(combine_rules(rules).query + "\n")
    except Exception as e:
        with open(result_txt, "w") as fp:
            fp.write(str(e) + "\n")
Beispiel #3
0
    def __search_task(self, job_id: JobId) -> None:
        """Do ursadb query for yara belonging to the provided job.
        If successful, create a new yara tasks to do further processing
        of the results.
        """
        logging.info("Parsing...")

        job = self.db.get_job(job_id)
        if job.status == "cancelled":
            logging.info("Job was cancelled, returning...")
            return

        if job.status == "new":
            # First search request - find datasets to query
            logging.info("New job, generate subtasks...")
            result = self.ursa.topology()

            if not result["result"]["datasets"].keys():
                logging.info("No datasets found. Finish the job and return...")
                self.db.agent_finish_job(job_id)
                return

            if "error" in result:
                raise RuntimeError(result["error"])
            self.db.init_job_datasets(
                self.group_id,
                job_id,
                list(result["result"]["datasets"].keys()),
            )

        logging.info("Get next dataset to query...")
        dataset = self.db.get_next_search_dataset(self.group_id, job_id)
        if dataset is None:
            logging.info("Nothing to query, returning...")
            return

        rules = parse_yara(job.raw_yara)
        parsed = combine_rules(rules)

        logging.info("Querying backend...")
        result = self.ursa.query(parsed.query, job.taints, dataset)
        if "error" in result:
            raise RuntimeError(result["error"])

        file_count = result["file_count"]
        iterator = result["iterator"]
        logging.info(f"Iterator {iterator} contains {file_count} files")

        total_files = self.db.update_job_files(job_id, file_count)
        if job.files_limit and total_files > job.files_limit:
            raise RuntimeError(
                f"Too many candidates after prefiltering (limit: {job.files_limit}). "
                "Try a more specific query.")

        self.db.agent_start_job(self.group_id, job_id, iterator)
        self.db.agent_continue_search(self.group_id, job_id)
        self.db.dataset_query_done(job_id)
Beispiel #4
0
def query(
    data: QueryRequestSchema = Body(...),
) -> Union[QueryResponseSchema, List[ParseResponseSchema]]:
    """
    Starts a new search. Response will contain a new job ID that can be used
    to check the job status and download matched files.
    """
    try:
        rules = parse_yara(data.raw_yara)
    except Exception as e:
        raise HTTPException(
            status_code=400, detail=f"Yara rule parsing failed: {e}"
        )

    if not rules:
        raise HTTPException(status_code=400, detail=f"No rule was specified.")

    if data.method == RequestQueryMethod.parse:
        return [
            ParseResponseSchema(
                rule_name=rule.name,
                rule_author=rule.author,
                is_global=rule.is_global,
                is_private=rule.is_private,
                parsed=rule.parse().query,
            )
            for rule in rules
        ]

    active_agents = db.get_active_agents()

    for agent, agent_spec in active_agents.items():
        missing = set(data.required_plugins).difference(
            agent_spec.active_plugins
        )
        if missing:
            raise HTTPException(
                status_code=409,
                detail=f"Agent {agent} doesn't support "
                f"required plugins: {', '.join(missing)}",
            )

    if not data.taints:
        data.taints = []

    job = db.create_search_task(
        rules[-1].name,
        rules[-1].author,
        data.raw_yara,
        data.priority,
        data.files_limit or 0,
        data.reference or "",
        data.taints,
        list(active_agents.keys()),
    )
    return QueryResponseSchema(query_hash=job.hash)
Beispiel #5
0
    def assert_query(self, data, expected_file_txt):
        try:
            rules = parse_yara(data)
            print(combine_rules(rules).query)

            with open(testdir + expected_file_txt, "rb") as exp:
                expected_data = exp.read().decode("utf-8")
            self.assertEqual(expected_data, combine_rules(rules).query + "\n")
        except Exception as e:
            with open(testdir + expected_file_txt, "rb") as exp:
                expected_data = exp.read().decode("utf-8")
            self.assertEqual(expected_data, str(e) + "\n")
Beispiel #6
0
def execute_search(job_hash: str) -> None:
    logging.info("Parsing...")

    job = redis.hgetall("job:" + job_hash)
    yara_rule = job["raw_yara"]

    redis.hmset("job:" + job_hash, {
        "status": "parsing",
        "timestamp": time.time()
    })

    rules = parse_yara(yara_rule)
    parsed = combine_rules(rules)

    redis.hmset("job:" + job_hash, {
        "status": "querying",
        "timestamp": time.time()
    })

    logging.info("Querying backend...")
    taint = job.get("taint", None)
    result = db.query(parsed.query, taint)
    if "error" in result:
        raise RuntimeError(result["error"])

    files = [f for f in result["files"] if f.strip()]

    logging.info("Database responded with {} files".format(len(files)))

    redis.hmset(
        "job:" + job_hash,
        {
            "status": "processing",
            "files_processed": 0,
            "total_files": len(files),
        },
    )

    if files:
        pipe = redis.pipeline()
        queue_name = get_queue_name(job["priority"])
        for file in files:
            if not config.SKIP_YARA:
                pipe.rpush(queue_name, "{}:{}".format(job_hash, file))
            else:
                pipe.rpush("queue-metadata", "{}:{}".format(job_hash, file))

        pipe.execute()
        logging.info("Done uploading yara jobs.")

    else:
        redis.hset("job:{}".format(job_hash), "status", "done")
Beispiel #7
0
def query(
    data: QueryRequestSchema = Body(...),
) -> Union[QueryResponseSchema, List[ParseResponseSchema]]:
    try:
        rules = parse_yara(data.raw_yara)
    except Exception as e:
        raise HTTPException(status_code=400,
                            detail=f"Yara rule parsing failed: {e}")

    if not rules:
        raise HTTPException(status_code=400, detail=f"No rule was specified.")

    if data.method == RequestQueryMethod.parse:
        return [
            ParseResponseSchema(
                rule_name=rule.name,
                rule_author=rule.author,
                is_global=rule.is_global,
                is_private=rule.is_private,
                parsed=rule.parse().query,
            ) for rule in rules
        ]

    job_hash = "".join(random.SystemRandom().choice(string.ascii_uppercase +
                                                    string.digits)
                       for _ in range(12))

    job_obj = {
        "status": "new",
        "rule_name": rules[-1].name,
        "rule_author": rules[-1].author,
        "raw_yara": data.raw_yara,
        "submitted": int(time.time()),
        "priority": data.priority,
    }

    if data.taint is not None:
        job_obj["taint"] = data.taint

    redis.hmset("job:" + job_hash, job_obj)
    redis.rpush("queue-search", job_hash)

    return QueryResponseSchema(query_hash=job_hash)
Beispiel #8
0
def execute_search(job_hash: str) -> None:
    logging.info("Parsing...")
    job_id = "job:" + job_hash

    job = redis.hgetall(job_id)
    yara_rule = job["raw_yara"]

    redis.hmset(job_id, {"status": "parsing", "timestamp": time.time()})

    rules = parse_yara(yara_rule)
    parsed = combine_rules(rules)

    redis.hmset(job_id, {"status": "querying", "timestamp": time.time()})

    logging.info("Querying backend...")
    taint = job.get("taint", None)
    result = db.query(parsed.query, taint)
    if "error" in result:
        raise RuntimeError(result["error"])

    file_count = result["file_count"]
    iterator = result["iterator"]
    logging.info(f"Iterator contains {file_count} files")

    redis.hmset(
        job_id,
        {
            "status": "processing",
            "iterator": iterator,
            "files_processed": 0,
            "total_files": file_count,
        },
    )

    if file_count > 0:
        list_name = get_list_name(job["priority"])
        redis.lpush(list_name, job_hash)
    else:
        redis.hset(job_id, "status", "done")
Beispiel #9
0
def query() -> Any:
    req = request.get_json()
    raw_yara = req["raw_yara"]

    try:
        rules = parse_yara(raw_yara)
    except Exception as e:
        return jsonify({"error": f"Yara rule parsing failed {e}"}), 400

    if not rules:
        return jsonify({"error": "No rule was specified."}), 400

    if req["method"] == "parse":
        return jsonify([{
            "rule_name": rule.name,
            "rule_author": rule.author,
            "is_global": rule.is_global,
            "is_private": rule.is_private,
            "parsed": rule.parse().query,
        } for rule in rules])

    job_hash = "".join(random.SystemRandom().choice(string.ascii_uppercase +
                                                    string.digits)
                       for _ in range(12))

    job_obj = {
        "status": "new",
        "rule_name": rules[-1].name,
        "rule_author": rules[-1].author,
        "raw_yara": raw_yara,
        "submitted": int(time.time()),
        "priority": req["priority"],
    }

    redis.hmset("job:" + job_hash, job_obj)
    redis.rpush("queue-search", job_hash)

    return jsonify({"query_hash": job_hash})