Beispiel #1
0
def _add_to_exiting_graph(
    existing_backend: Backend,
    datasource_cls: Type[DataSource],
    transformer_cls: Type[Transformer],
    params: Dict[str, Any],
    is_external: bool,
) -> Tuple[dict, bool]:
    try:
        # Set up parameters for datasource class
        datasource_params = (
            # Use filenames if we are referencing a temporary file
            {param_name: tempfile.name for param_name, tempfile in params.items()}
            if not is_external
            else params
        )
        # Create the datasource
        datasource = datasource_cls(**datasource_params)  # type: ignore
        # Create transformer
        transformer = datasource.to_transformer(transformer_cls)

        # Create the nodes
        nodes = transformer.run()

        # Create the backend
        G = existing_backend.add_nodes(nodes)

    except Exception as e:
        logger.critical(f"Failure to generate graph {e}")
        import traceback

        logger.debug(f"{traceback.format_exc()}")

        if not is_external:
            # Clean up temporary files
            try:
                for _tempfile in params.values():
                    _tempfile.close()
            except Exception as e:
                logger.critical(f"Failure to clean up temporary files after error {e}")
                return {"message": str(e)}, False

    logger.info("Cleaning up tempfiles")

    if not is_external:
        # Clean up temporary files
        for _tempfile in params.values():
            _tempfile.close()

    logger.info("Finished generating graph")

    # Check if we even had a graph.
    # This will be on the G attribute for any class subclassing NetworkX
    if existing_backend.is_empty():
        return {"message": f"Graph generation resulted in 0 nodes."}, False

    return {"graph": G, "backend": existing_backend}, True
Beispiel #2
0
def new():
    """Generate a new graph using the supplied DataSource, Transformer, and the parameters
    passed to the DataSource.

    At minimum, the user must supply the following form parameters:
        1. datasource
        2. transformer
        3. comment
        4. backend

    Outside of that, the user must supply at **minimum** the parameters marked by
    the datasource as required.
        * Use the /api/datasources endpoint to see which ones these are.
        * Programmatically, these are any parameters without a default value.

    Failure to supply either the minimum three or the required parameters for that datasource
    returns a 400 status code with the missing parameters in the 'message' field.

    If any part of the graph creation yields an error, a 500 HTTP code is returend with the
    python exception as a string in the 'message' field.

    If the graph is succesfully created, the user is returned a dictionary with the ID of the graph
    and the URI path to viewing it in the *beagle web interface*.

    For example:

    >>> {
        id: 1,
        self: /fireeye_hx/1
    }

    Returns
    -------
    dict
        {id: integer, self: string}
    """

    # Verify we have the basic parameters.
    missing_params = []
    for param in ["datasource", "transformer", "comment"]:
        if param not in request.form:
            missing_params.append(param)

    if len(missing_params) > 0:
        logger.debug(f"Request to /new missing parameters: {missing_params}")
        return make_response(
            jsonify({"message": f"Missing parameters {missing_params}"}), 400)

    # Get the
    requested_datasource = request.form["datasource"]
    requested_transformer = request.form["transformer"]
    requested_backend = request.form.get("backend", "NetworkX")

    datasource_schema = next(
        filter(lambda entry: entry["id"] == requested_datasource,
               SCHEMA["datasources"]), None)

    if datasource_schema is None:
        logger.debug(
            f"User requested a non-existent data source {requested_datasource}"
        )
        return make_response(
            jsonify({
                "message":
                f"Requested datasource '{requested_datasource}' is invalid, " +
                "please use /api/datasources to find a list of valid datasources"
            }),
            400,
        )

    logger.info(
        f"Recieved upload request for datasource=<{requested_datasource}>, " +
        f"transformer=<{requested_transformer}>, backend=<{requested_backend}>"
    )

    datasource_cls = DATASOURCES[requested_datasource]
    transformer_cls = TRANSFORMERS[requested_transformer]
    backend_class = BACKENDS[requested_backend]

    required_parameters = datasource_schema["params"]

    # If this class extends the ExternalDataSource class, we know that the parameters
    # represent strings, and not files.
    is_external = issubclass(datasource_cls, ExternalDataSource)

    # Make sure the user provided all required parameters for the datasource.
    datasource_missing_params = []
    for param in required_parameters:
        # Skip missnig parameters
        if param["required"] is False:
            continue
        if is_external and param["name"] not in request.form:
            datasource_missing_params.append(param["name"])

        if not is_external and param["name"] not in request.files:
            datasource_missing_params.append(param["name"])

    if len(datasource_missing_params) > 0:
        logger.debug(
            f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}"
        )
        return make_response(
            jsonify({
                "message":
                f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}"
            }),
            400,
        )

    logger.info("Transforming data to a graph.")

    logger.debug("Setting up parameters")
    params = {}

    if is_external:
        # External parameters are in the form
        params = {}
        for param in datasource_schema["params"]:
            if param["name"] in request.form:
                params[param["name"]] = request.form[param["name"]]

        logger.info(f"ExternalDataSource params received {params}")

    else:
        for param in datasource_schema["params"]:
            # Save the files, keep track of which parameter they represent
            if param["name"] in request.files:
                params[param["name"]] = tempfile.NamedTemporaryFile()
                request.files[param["name"]].save(params[param["name"]].name)
                params[param["name"]].seek(0)

        logger.info(f"Saved uploaded files {params}")

    logger.debug("Set up parameters")

    try:
        # Create the datasource
        datasource = datasource_cls(
            # Give file paths instead of file-like objects when not external source.
            **({
                param_name: tempfile.name
                for param_name, tempfile in params.items()
            } if not is_external else params))
        transformer = datasource.to_transformer(transformer_cls)
        graph = backend_class(metadata=datasource.metadata(),
                              nodes=transformer.run(),
                              consolidate_edges=True)
        # Make the graph
        G = graph.graph()

    except Exception as e:
        logger.critical(f"Failure to generate graph {e}")
        import traceback

        logger.debug(f"{traceback.format_exc()}")

        if not is_external:
            # Clean up temporary files
            try:
                for _tempfile in params.values():
                    _tempfile.close()
            except Exception as e:
                logger.critical(
                    f"Failure to clean up temporary files after error {e}")

        response = make_response(jsonify({"message": str(e)}), 500)
        response.headers.add("Access-Control-Allow-Origin", "*")
        return response

    logger.info("Cleaning up tempfiles")

    if not is_external:
        # Clean up temporary files
        for _tempfile in params.values():
            _tempfile.close()

    logger.info("Finished generating graph")

    # Check if we even had a graph.
    # This will be on the G attribute for any class subclassing NetworkX
    if graph.is_empty():
        return make_response(
            jsonify({"message": f"Graph generation resulted in 0 nodes. "}),
            400)

    # If the backend is NetworkX, save the graph.
    # Otherwise, redirect the user to wherever he sent it (if possible)
    if backend_class.__name__ == "NetworkX":

        # Take the SHA256 of the contents of the graph.
        contents_hash = hashlib.sha256(
            json.dumps(graph.to_json(),
                       sort_keys=True).encode("utf-8")).hexdigest()

        # See if we have previously generated this *exact* graph.
        existing = Graph.query.filter_by(meta=graph.metadata,
                                         sha256=contents_hash).first()

        if existing:
            logger.info(f"Graph previously generated with id {existing.id}")
            response = jsonify({
                "id": existing.id,
                "self": f"/{existing.category}/{existing.id}"
            })
            response.headers.add("Access-Control-Allow-Origin", "*")
            return response

        dest_folder = datasource_cls.category.replace(" ", "_").lower()
        # Set up the storage directory.
        dest_path = f"{Config.get('storage', 'dir')}/{dest_folder}/{contents_hash}.json"
        os.makedirs(f"{Config.get('storage', 'dir')}/{dest_folder}",
                    exist_ok=True)

        db_entry = Graph(
            sha256=contents_hash,
            meta=graph.metadata,
            comment=request.form.get("comment", None),
            category=dest_folder,  # Categories use the lower name!
            file_path=f"{contents_hash}.json",
        )

        db.session.add(db_entry)
        db.session.commit()

        logger.info(f"Added graph to database with id={db_entry.id}")

        json.dump(graph.to_json(), open(dest_path, "w"))

        logger.info(f"Saved graph to {dest_path}")

        response = jsonify({
            "id": db_entry.id,
            "self": f"/{dest_folder}/{db_entry.id}"
        })
    else:
        logger.debug(G)
        response = jsonify({"resp": G})

    response.headers.add("Access-Control-Allow-Origin", "*")
    return response