Example #1
0
    def __init__(
        self,
        uri: str = Config.get("neo4j", "host"),
        username: str = Config.get("neo4j", "username"),
        password: str = Config.get("neo4j", "password"),
        clear_database: bool = False,
        *args,
        **kwargs,
    ):

        logger.info(f"Connecting to neo4j server at {uri}")

        self.neo4j = GraphDatabase.driver(uri, auth=(username, password))

        super().__init__(*args, **kwargs)

        logger.info("Initialized Neo4j Backend")
        self.batch_size = int(Config.get("neo4j", "batch_size"))
        self.uri = uri

        if clear_database:
            logger.info("Wiping database")
            with self.neo4j.session() as session:
                session.write_transaction(
                    lambda tx: tx.run("MATCH (n) DETACH DELETE n"))
Example #2
0
    def setup_session(self):  # pragma: no cover
        import splunklib.client as client

        client_kwargs = {
            "host": Config.get("splunk", "host"),
            "username": Config.get("splunk", "username"),
            "password": Config.get("splunk", "password"),
            "port": int(Config.get("splunk", "port", fallback=8089)),
        }

        logger.info(f"Creating Splunk client for host={client_kwargs['host']}")

        return client.connect(sharing="global",
                              **client_kwargs,
                              handler=handler())
Example #3
0
    def _get_key(self) -> str:
        """Gets the graphistry API key from the enviroment variables or config.

        Returns
        -------
        str
            The graphistry API key.
        """

        if "GRAPHISTRY_API_KEY" in os.environ:
            return os.environ["GRAPHISTRY_API_KEY"]
        else:
            return Config.get("graphistry", "api_key")
Example #4
0
    def __init__(
        self,
        host: str = Config.get("dgraph", "host"),
        batch_size: int = int(Config.get("dgraph", "batch_size")),
        wipe_db: bool = False,
        *args,
        **kwargs,
    ):

        logger.info(f"Connecting to Dgraph server at {host}")

        client_stub = pydgraph.DgraphClientStub(host)

        self.dgraph = pydgraph.DgraphClient(client_stub)

        super().__init__(*args, **kwargs)

        if wipe_db:
            logger.info("Wiping existing database due to wipe_db=True")
            self.dgraph.alter(pydgraph.Operation(drop_all=True))

        self.batch_size = 1000
        logger.info("Initialized Dgraph Backend")
Example #5
0
    def __init__(self, file_hash: str, sandbox_name: str = None):
        api_key = Config.get("virustotal", "api_key")

        if not api_key:
            logger.critical(
                f"BEAGLE__VIRUSTOTAL__API_KEY not found in enviroment variables or beagle.config object"
            )
            raise RuntimeError(
                "BEAGLE__VIRUSTOTAL__API_KEY not found in enviroment variables or beagle.config object"
            )

        logger.info(f"Grabbing metadata and sandbox reports for {file_hash}")

        headers = {"x-apikey": api_key}

        self.hash_metadata = requests.get(
            f"https://www.virustotal.com/api/v3/files/{file_hash}", headers=headers
        ).json()
        behaviour_reports = requests.get(
            f"https://www.virustotal.com/api/v3/files/{file_hash}/behaviours", headers=headers
        ).json()

        # Get the sandbox we want, or the first one.
        if sandbox_name:
            possible_sandboxes = [
                report["attributes"]["sandbox_name"] for report in behaviour_reports["data"]
            ]
            logger.info(f"Sample has reports from {','.join(possible_sandboxes)}")
            if sandbox_name in possible_sandboxes:
                logger.info(f"Requested sandbox {sandbox_name} availble, using it.")
                behaviour_report = list(
                    filter(
                        lambda val: val["attributes"]["sandbox_name"] == sandbox_name,
                        behaviour_reports["data"],
                    )
                )[0]
            else:
                logger.info(f"Requested sandbox {sandbox_name} not found, using first sandbox.")
                behaviour_report = behaviour_reports["data"][0]
        else:

            behaviour_report = behaviour_reports["data"][0]
            logger.info(
                f"No sandbox specified, using {behaviour_report['attributes']['sandbox_name']}"
            )

        self.behaviour_report = behaviour_report[
            "attributes"
        ]  # Set up same way as GenericVTSandbox
Example #6
0
    def _setup_session(self):  # pragma: no cover
        from elasticsearch import Elasticsearch

        client_kwargs = {
            "host": Config.get("elasticsearch", "host"),
            "scheme": Config.get("elasticsearch", "scheme"),
            "port": int(Config.get("elasticsearch", "port", fallback=9200)),
        }
        if Config.get("elasticsearch", "username") and Config.get("elasticsearch", "password"):
            client_kwargs["http_auth"] = (
                Config.get("elasticsearch", "username"),
                Config.get("elasticsearch", "password"),
            )

        logger.info(f"Creating Elasticsearch client for host={client_kwargs['host']}")
        return Elasticsearch(**client_kwargs)
Example #7
0
    def events(self) -> Generator[dict, None, None]:
        query = {
            "query": {
                "bool": {
                    "must": {"query_string": {"query": self.query}},
                    "filter": [
                        {
                            "range": {
                                "@timestamp": {"gte": "now" + self.earliest, "lte": self.latest}
                            }
                        }
                    ],
                }
            }
        }

        # Number of hits to return
        size = int(Config.get("elasticsearch", "scroll_size", fallback=100))
        data = self.client.search(index=self.index, body=query, scroll="2m", size=size)
        # Get the scroll ID
        sid = data["_scroll_id"]
        scroll_size = len(data["hits"]["hits"])

        while scroll_size > 0:
            # Before scroll, process current batch of hits
            for item in data["hits"]["hits"]:
                source = item["_source"]
                source["_id"] = item["_id"]
                yield source
            data = self.client.scroll(scroll_id=sid, scroll="2m")

            # Update the scroll ID
            sid = data["_scroll_id"]

            # Get the number of results that returned in the last scroll
            scroll_size = len(data["hits"]["hits"])
Example #8
0
import sys

from loguru import logger

from beagle.config import Config

logger.remove(0)
logger.add(
    sys.stdout,
    colorize=True,
    level=Config.get("general", "log_level").upper(),
    format="<green>{time:YYYY-MM-DDTHH:mm:ss}</green> | " +
    "<red>{name}.{function}:{line}</red> | " +
    "<cyan>{level}</cyan> | <level>{message}</level>",
)