Ejemplo n.º 1
0
def set_process_schema(client: DgraphClient, engagement: bool = False):

    schema = """node_key: string @upsert @index(hash) .
    process_id: int @index(int) .
    created_timestamp: int @index(int) .
    asset_id: string @index(hash) .
    terminate_time: int @index(int) .
    image_name: string @index(exact, hash, trigram, fulltext) .
    process_name: string @index(exact, hash, trigram, fulltext) .
    arguments: string  @index(fulltext)  @index(trigram) .
    bin_file: uid @reverse .
    children: [uid] @reverse .
    created_files: [uid] @reverse .
    deleted_files: [uid] @reverse .
    read_files: [uid] @reverse .
    wrote_files: [uid] @reverse .
    created_connections: [uid] @reverse .
    bound_connections: [uid] @reverse .
    """

    if engagement:
        schema += "\n"
        schema += "risks: [uid] @reverse ."

    # unstable
    schema += """
        process_guid: string @index(exact, hash, trigram, fulltext) .
    """

    op = pydgraph.Operation(schema=schema)
    client.alter(op)
Ejemplo n.º 2
0
def set_file_schema(client: DgraphClient, engagement: bool = False) -> None:

    schema = """
    node_key: string @upsert @index(hash) .
    file_name: string @index(exact, hash, trigram, fulltext) .
    asset_id: string @index(exact, hash, trigram, fulltext) .
    file_path: string @index(exact, hash, trigram, fulltext) .
    file_extension: string @index(exact, hash, trigram, fulltext) .
    file_mime_type: string @index(exact, hash, trigram, fulltext) .
    file_size: int @index(int) .
    file_version: string @index(exact, hash, trigram, fulltext) .
    file_description: string @index(exact, hash, trigram, fulltext) .
    file_product: string @index(exact, hash, trigram, fulltext) .
    file_company: string @index(exact, hash, trigram, fulltext) .
    file_directory: string @index(exact, hash, trigram, fulltext) .
    file_inode: int @index(int) .
    file_hard_links: string @index(exact, hash, trigram, fulltext) .
    signed: bool @index(bool) .
    signed_status: string @index(exact, hash, trigram, fulltext) .
    md5_hash: string @index(exact, hash, trigram, fulltext) .
    sha1_hash: string @index(exact, hash, trigram, fulltext) .
    sha256_hash: string @index(exact, hash, trigram, fulltext) .
    """
    if engagement:
        schema += "\n"
        schema += "risks: uid @reverse ."

    op = pydgraph.Operation(schema=schema)
    client.alter(op)
Ejemplo n.º 3
0
def create_process_schema(eg_client: DgraphClient):
    schema = \
        'node_key: string @index(hash) .\n' +\
        'engagement_key: string @index(hash) .\n' +\
        'children: uid @reverse .\n' +\
        'pid: int @index(int) .\n'

    op = pydgraph.Operation(schema=schema)
    eg_client.alter(op)
Ejemplo n.º 4
0
    def __init__(self, id: str):
        self.engagement_key = id

        self.eg_client: DgraphClient = DgraphClient(
            DgraphClientStub("alpha0.engagementgraphcluster.grapl:9080"))
        self.mg_client: DgraphClient = DgraphClient(
            DgraphClientStub("alpha0.mastergraphcluster.grapl:9080"))

        self.node_copier: NodeCopier = NodeCopier(id, self.mg_client,
                                                  self.eg_client)
Ejemplo n.º 5
0
def copy_node(
    src_client: DgraphClient,
    dst_client: DgraphClient,
    node_key: str,
    init_node: Optional[Dict[str, Any]] = None,
) -> Any:
    if not init_node:
        init_node = dict()
    assert init_node is not None

    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}")) {{
                    uid,
                    dgraph.type,
                    expand(_all_),
            }}
        }}
        """
    txn = src_client.txn(read_only=True)

    try:
        res = json.loads(txn.query(query).json)["q0"]
    finally:
        txn.discard()

    if not res:
        raise Exception("ERROR: Can not find res")

    if not res[0].get("dgraph.type"):
        raise Exception("ERROR: Can not find res dgraph.type")

    raw_to_copy = {**res[0], **init_node}

    return upsert(dst_client, raw_to_copy)
Ejemplo n.º 6
0
def _upsert(client: DgraphClient, node_dict: Dict[str, Property]) -> str:
    if node_dict.get('uid'):
        node_dict.pop('uid')
    node_dict['uid'] = '_:blank-0'
    node_key = node_dict['node_key']
    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}")) {{
                    uid,  
                    expand(_forward_)            
            }}
        }}
        """
    txn = client.txn(read_only=False)

    try:
        res = json.loads(txn.query(query).json)['q0']
        new_uid = None
        if res:
            node_dict['uid'] = res[0]['uid']
            new_uid = res[0]['uid']

        mutation = node_dict

        m_res = txn.mutate(set_obj=mutation, commit_now=True)
        uids = m_res.uids

        if new_uid is None:
            new_uid = uids['blank-0']
        return str(new_uid)

    finally:
        txn.discard()
Ejemplo n.º 7
0
def upsert(client: DgraphClient, node_dict: Dict[str, Any]) -> Any:
    if node_dict.get("uid"):
        node_dict.pop("uid")
    node_dict["uid"] = "_:blank-0"
    node_key = node_dict["node_key"]
    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}"))  @cascade {{
                    uid,
                    dgraph.type,
            }}
        }}
        """
    txn = client.txn(read_only=False)

    try:
        res = json.loads(txn.query(query).json)["q0"]

        if res:
            node_dict["uid"] = res[0]["uid"]
            node_dict = {**node_dict, **res[0]}

        mutation = node_dict

        mut_res = txn.mutate(set_obj=mutation, commit_now=True)
        new_uid = node_dict.get("uid") or mut_res.uids["blank-0"]
        return new_uid

    finally:
        txn.discard()
Ejemplo n.º 8
0
    def test_process_with_created_files(self) -> None:
        # Given: a process with a pid 100 & process_name word.exe,
        local_client = DgraphClient(DgraphClientStub("localhost:9080"))

        created_timestamp = int(time.time())

        parent_process = {
            "process_id": 100,
            "process_name": "word.exe",
            "created_timestamp": created_timestamp,
        }  # type: Dict[str, Property]

        parent_process_view = upsert(
            local_client,
            "Process",
            ProcessView,
            "763ddbda-8812-4a07-acfe-83402b92379d",
            parent_process,
        )

        created_file = {
            "file_path": "/folder/file.txt",
            "created_timestamp": created_timestamp + 1000,
        }  # type: Dict[str, Property]

        created_file_view = upsert(
            local_client,
            "File",
            FileView,
            "575f103e-1a11-4650-9f1b-5b72e44dfec3",
            created_file,
        )

        create_edge(
            local_client,
            parent_process_view.uid,
            "created_files",
            created_file_view.uid,
        )

        queried_process = (
            ProcessQuery()
            .with_node_key(eq="763ddbda-8812-4a07-acfe-83402b92379d")
            .with_process_id(eq=100)
            .with_process_name(contains="word")
            .with_created_timestamp(eq=created_timestamp)
            .with_created_files(
                FileQuery()
                .with_node_key(eq="575f103e-1a11-4650-9f1b-5b72e44dfec3")
                .with_file_path(eq="/folder/file.txt")
            )
            .query_first(local_client)
        )

        assert queried_process
        assert queried_process.process_id == 100

        assert len(queried_process.created_files) == 1
        created_file = queried_process.created_files[0]
        assert created_file.file_path == "/folder/file.txt"
Ejemplo n.º 9
0
    def _get_children(self,
                      client: DgraphClient,
                      eg=True) -> Optional[List[Process]]:
        if eg:
            q_filter = '@filter(eq(engagement_key, "{}"))'.format(
                self.engagement_key)
        else:
            q_filter = ''

        proc_res = json.loads(
            client.query("""{{
                q0(func: eq(node_key, "{}"))
                {}
                {{
                    children {{
                        uid,
                        node_key,
                        image_name,
                        image_path,
                    }}
                    
                }}
            }}""".format(self.node_key, q_filter)).json)['q0']

        if not proc_res:
            return None

        return [self.from_dict(child) for child in proc_res[0]['children']]
Ejemplo n.º 10
0
    def _get_parent(self, client: DgraphClient, eg=True) -> Optional[Process]:
        if eg:
            q_filter = '@filter(eq(engagement_key, "{}"))'.format(
                self.engagement_key)
        else:
            q_filter = ''

        proc_res = json.loads(
            client.query("""{{
                q0(func: eq(node_key, "{}"))
                {}
                {{
                    ~children {{
                        uid,
                        node_key,
                        image_name,
                        image_path,
                    }}
                    
                }}
            }}""".format(self.node_key, q_filter)).json)['q0']

        if not proc_res:
            return None

        return Process(
            proc_res[0]['~children'][0]['node_key'],
            proc_res[0]['~children'][0]['uid'],
            self.engagement_key,
            proc_res[0]['~children'][0].get('image_name', None),
            proc_res[0]['~children'][0].get('image_path', None),
            self.engagement,
        )
Ejemplo n.º 11
0
def upsert(client: DgraphClient, node_dict: Dict[str, Any]) -> str:
    if node_dict.get('uid'):
        node_dict.pop('uid')
    node_dict['uid'] = '_:blank-0'
    node_key = node_dict['node_key']
    print(f"INFO: Upserting node: {node_dict}")
    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}")) {{
                    uid,
                    dgraph.type,
            }}
        }}
        """
    txn = client.txn(read_only=False)

    try:
        res = json.loads(txn.query(query).json)['q0']

        if res:
            node_dict['uid'] = res[0]['uid']
            node_dict = {**node_dict, **res[0]}

        mutation = node_dict

        mut_res = txn.mutate(set_obj=mutation, commit_now=True)
        new_uid = node_dict.get('uid') or mut_res.uids["blank-0"]
        return new_uid

    finally:
        txn.discard()
Ejemplo n.º 12
0
def raw_node_from_uid(dgraph_client: DgraphClient,
                      uid: str) -> Optional[Dict[str, Any]]:
    query = f"""
        {{
            res(func: uid("{uid}"), first: 1) {{
                uid,
                expand(_all_),
                node_type: dgraph.type
            }}
        }}
        """

    txn = dgraph_client.txn(read_only=True, best_effort=False)
    try:
        res = json.loads(txn.query(query).json)["res"]
    finally:
        txn.discard()
    if not res:
        return None
    else:
        if isinstance(res, list):
            node_type = res[0].get("node_type")
            if node_type:
                res[0]["node_type"] = res[0]["node_type"][0]
            else:
                print(f"WARN: node_type missing from {uid} {res}")

            return cast(Dict[str, Any], res[0])
        else:
            node_type = res.get("node_type")
            if node_type:
                res["node_type"] = res["node_type"][0]
            else:
                print(f"WARN: node_type missing from {uid} {res}")
            return cast(Dict[str, Any], res)
Ejemplo n.º 13
0
    def get_count(
        self, dgraph_client: DgraphClient, first: Optional[int] = None,
    ) -> int:
        query_str = generate_query(
            query_name="res",
            binding_modifier="res",
            root=self,
            contains_node_key=None,
            first=first,
            count=True,
        )

        txn = dgraph_client.txn(read_only=True)
        try:
            raw_count = json.loads(txn.query(query_str).json)["res"]
        finally:
            txn.discard()

        if not raw_count:
            return 0
        else:
            if isinstance(raw_count, list):
                return int(raw_count[0].get("count", 0))
            if isinstance(raw_count, dict):
                return int(raw_count.get("count", 0))
            raise TypeError("raw_count is not list or dict")
Ejemplo n.º 14
0
def edge_in_lens(dg_client: DgraphClient, node_uid: str, edge_name: str,
                 lens_name: str) -> List[Dict[str, Any]]:
    query = f"""
        query q0($node_uid: string, $lens_name: string)
        {{
            q0(func: uid($node_uid)) @cascade {{
                {edge_name} {{
                    uid,
                    node_key,
                    node_type: dgraph.type,
                    ~scope @filter(eq(lens, $lens_name)) {{
                        uid,
                        node_type: dgraph.type,
                    }}
                }}
            }}
        }}
    """

    txn = dg_client.txn(read_only=True)

    try:
        variables = {"$node_uid": node_uid, "$lens_name": lens_name}
        res = json.loads(txn.query(query, variables=variables).json)
        return res["q0"]
    finally:
        txn.discard()
Ejemplo n.º 15
0
    def query(
        self,
        dgraph_client: DgraphClient,
        contains_node_key: Optional[str] = None,
        first: Optional[int] = 1000,
    ) -> List["NV"]:
        if not first:
            first = 1000

        if contains_node_key:
            first = 1

        query_str = generate_query(
            query_name="res",
            binding_modifier="res",
            root=self,
            contains_node_key=contains_node_key,
            first=first,
        )

        txn = dgraph_client.txn(read_only=True)
        try:
            raw_views = json.loads(txn.query(query_str).json)["res"]
        except Exception as e:
            raise Exception(query_str, e)
        finally:
            txn.discard()

        if not raw_views:
            return []

        return [
            self.view_type.from_dict(dgraph_client, raw_view) for raw_view in raw_views
        ]
Ejemplo n.º 16
0
    def test_process_query_view_parity(self, process_props: ProcessProps):
        local_client = DgraphClient(DgraphClientStub("localhost:9080"))

        created_proc = get_or_create_process(self, local_client, process_props,)

        queried_proc = (
            ProcessQuery()
            .with_node_key(eq=created_proc.node_key)
            .query_first(local_client)
        )

        assert queried_proc

        assert process_props["node_key"] == queried_proc.node_key
        assert "Process" == queried_proc.get_node_type()
        assert process_props["process_id"] == queried_proc.get_process_id()
        assert process_props["arguments"] == escape_dgraph_str(
            queried_proc.get_arguments()
        )
        assert (
            process_props["created_timestamp"] == queried_proc.get_created_timestamp()
        )
        assert None == queried_proc.get_asset()
        assert process_props["terminate_time"] == queried_proc.get_terminate_time()
        assert process_props["image_name"] == escape_dgraph_str(
            queried_proc.get_image_name()
        )
        assert process_props["process_name"] == escape_dgraph_str(
            queried_proc.get_process_name()
        )
Ejemplo n.º 17
0
def get_lens_risks(dg_client: DgraphClient, lens: str) -> List[Dict[str, Any]]:
    query = """
        query q0($a: string)
        {
            q0(func: eq(lens, $a)) {
                uid,
                node_type: dgraph.type,
                node_key,
                lens,
                score,
                scope {
                    uid,
                    node_key,
                    node_type: dgraph.type
                    risks {
                        uid,
                        node_key,
                        analyzer_name,
                        node_type: dgraph.type,
                        risk_score
                    }
                }
            }
      }"""

    txn = dg_client.txn(read_only=True)

    try:
        variables = {"$a": lens}
        res = json.loads(txn.query(query, variables=variables).json)
        if not res["q0"]:
            return []
        return res["q0"][0]["scope"]
    finally:
        txn.discard()
Ejemplo n.º 18
0
def expand_node_forward(dgraph_client: DgraphClient, node_key: str) -> Optional[Dict[str, Any]]:
    query = """
        query res($node_key: string)
        {
        
            res(func: eq(node_key, $node_key))
            {
                uid,
                expand(_all_) {
                    uid,
                    expand(_all_),
                    node_type: dgraph.type
                }
                node_type: dgraph.type
            }
      
        }
    """

    txn = dgraph_client.txn(read_only=True)
    variables = {"$node_key": node_key}
    try:
        res = json.loads(txn.query(query, variables=variables).json)
    finally:
        txn.discard()
    return res['res'][0]
Ejemplo n.º 19
0
def copy_node(mgclient: DgraphClient,
              egclient: DgraphClient,
              node_key: str,
              init_node: Optional[Dict[str, Any]] = None) -> None:
    if not init_node:
        init_node = dict()
    assert init_node is not None

    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}")) {{
                    uid,
                    expand(_all_),
                    dgraph.type    
            }}
        }}
        """
    txn = mgclient.txn(read_only=True)

    try:
        res = json.loads(txn.query(query).json)['q0']
    finally:
        txn.discard()

    if not res:
        raise Exception("ERROR: Can not find res")

    print(f"Copy query result: {res}")

    raw_to_copy = {**res[0], **init_node}

    return upsert(egclient, raw_to_copy)
Ejemplo n.º 20
0
def should_throttle(
        analyzer_name: str,
        root_node_key: str,
        dgraph_client: DgraphClient
) -> bool:
    query = """
            query q0($a: string, $n: string)
            {
              q0(func: eq(node_key, $n), first: 1) @cascade
              {
                uid,
                ~scope {
                    uid,
                    risks @filter(eq(analyzer_name, $a)) {
                        uid
                    }
                }
              }
            }
            """

    variables = {
        '$a': analyzer_name,
        '$n': root_node_key
    }

    res = json.loads(dgraph_client.txn(read_only=True).
                     query(query, variables=variables).json)
    if res['q0']:
        return True
    return False
Ejemplo n.º 21
0
    def upsert(client: DgraphClient, node_key: str, props: Dict[str, str]):
        query = """
            query q0($a: string)
            {
              q0(func: eq(node_key, $a))
              {
                uid,
                expand(_all_)
              }
            }
            """

        txn = client.txn(read_only=False)

        try:
            res = json.loads(txn.query(query, variables={'$a': node_key}).json)
            node = res['q0']

            if not node:
                node = props
            else:
                # TODO: Merge lists of properties together
                node = {**props, **node[0]}

            res = txn.mutate(set_obj=node, commit_now=True)
            uids = res.uids
            uid = uids['blank-0']
        finally:
            txn.discard()

        return uid
Ejemplo n.º 22
0
    def test_process_query_view_miss(self, process_props: ProcessProps) -> None:
        local_client = DgraphClient(DgraphClientStub("localhost:9080"))

        created_proc = get_or_create_process(self, local_client, process_props)

        assert (
            created_proc.process_id is not None
            and created_proc.arguments is not None
            and created_proc.created_timestamp is not None
            and created_proc.terminate_time is not None
            and created_proc.image_name is not None
            and created_proc.process_name is not None
        )
        queried_proc = (
            ProcessQuery()
            .with_node_key(eq=created_proc.node_key)
            .with_process_id(eq=Not(created_proc.process_id))
            .with_arguments(eq=Not(created_proc.arguments))
            .with_created_timestamp(eq=Not(created_proc.created_timestamp))
            .with_terminate_time(eq=Not(created_proc.terminate_time))
            .with_image_name(eq=Not(created_proc.image_name))
            .with_process_name(eq=Not(created_proc.process_name))
            .query_first(local_client)
        )

        assert not queried_proc
Ejemplo n.º 23
0
def get_uid(client: DgraphClient, node_key: str) -> str:
    txn = client.txn(read_only=True)
    try:
        query = """
            query res($a: string)
            {
              res(func: eq(node_key, $a), first: 1) @cascade
               {
                 uid,
               }
             }"""
        res = txn.query(query, variables={"$a": node_key})
        res = json.loads(res.json)

        if isinstance(res["res"], list):
            if res["res"]:
                return str(res["res"][0]["uid"])
            else:
                raise Exception(
                    f"get_uid failed for node_key: {node_key} {res}")
        else:
            return str(res["res"]["uid"])

    finally:
        txn.discard()
Ejemplo n.º 24
0
def _upsert(client: DgraphClient, node_dict: Dict[str, Property]) -> str:
    if node_dict.get("uid"):
        node_dict.pop("uid")
    node_dict["uid"] = "_:blank-0"
    node_key = node_dict["node_key"]
    query = f"""
        {{
            q0(func: eq(node_key, "{node_key}"))
            {{
                    uid,  
                    expand(_all_)
            }}
        }}
        """
    txn = client.txn(read_only=False)

    try:
        res = json.loads(txn.query(query).json)["q0"]
        new_uid = None
        if res:
            node_dict["uid"] = res[0]["uid"]
            new_uid = res[0]["uid"]

        mutation = node_dict

        m_res = txn.mutate(set_obj=mutation, commit_now=True)
        uids = m_res.uids

        if new_uid is None:
            new_uid = uids["blank-0"]
        return str(new_uid)

    finally:
        txn.discard()
Ejemplo n.º 25
0
def get_lens_scope(dg_client: DgraphClient, lens: str) -> Dict[str, Any]:
    query = """
        query q0($a: string)
        {
            q0(func: eq(lens, $a)) {
                uid,
                node_type: dgraph.type,
                node_key,
                lens,
                score,
                scope {
                    uid,
                    expand(_all_),
                    node_type: dgraph.type,
                }
            }
      }"""

    txn = dg_client.txn(read_only=True)

    try:
        variables = {"$a": lens}
        res = json.loads(txn.query(query, variables=variables).json)
        if not res["q0"]:
            return {}
        return res["q0"][0]
    finally:
        txn.discard()
Ejemplo n.º 26
0
def attach_risk(client: DgraphClient, node: Dict[str, Any], analyzer_name: str, risk_score: int):
    txn = client.txn(read_only=False)
    try:
        query = """
            query q0($a: string, $b: string)
            {
            
              n as var(func: eq(node_key, $a), first: 1) {
                uid
              }
            
              q0(func: uid(n), first: 1)
              {
                uid,
                risks @filter(
                    eq(analyzer_name, $b)
                )
                {
                    uid
                }
              }
            }
            """

        variables = {
            '$a': node['node_key'],
            '$b': analyzer_name
        }
        txn = client.txn(read_only=False)
        res = json.loads(txn.query(query, variables=variables).json)

        if res['q0'] and res['q0'][0].get('risks'):
            return

        mutation = {
            "uid": res['q0'][0]['uid'],
            "risks": {
                'analyzer_name': analyzer_name,
                'risk_score': risk_score
            }
        }

        print(f"mutation: {mutation}")

        txn.mutate(set_obj=mutation, commit_now=True)
    finally:
        txn.discard()
Ejemplo n.º 27
0
    def test_with_wrote_files(self) -> None:
        # Given: a process with a pid 100 & process_name word.exe,
        local_client = DgraphClient(DgraphClientStub("localhost:9080"))

        created_timestamp = int(time.time())

        parent_process = {
            "process_id": 100,
            "process_name": "word.exe",
            "created_timestamp": created_timestamp,
        }  # type: Dict[str, Property]

        parent_process_view = upsert(
            local_client,
            "Process",
            ProcessView,
            "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc",
            parent_process,
        )

        wrote_file = {
            "file_path": "/folder/file.txt",
            "created_timestamp": created_timestamp + 1000,
        }  # type: Dict[str, Property]

        wrote_file_view = upsert(
            local_client,
            "File",
            FileView,
            "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833",
            wrote_file,
        )

        create_edge(
            local_client,
            parent_process_view.uid,
            "wrote_files",
            wrote_file_view.uid,
        )

        queried_process = (ProcessQuery().with_node_key(
            eq="test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc"
        ).with_process_id(eq=100).with_process_name(
            contains="word"
        ).with_created_timestamp(eq=created_timestamp).with_wrote_files(
            FileQuery().with_node_key(
                eq="test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833"
            ).with_file_path(eq="/folder/file.txt")).query_first(local_client))

        assert queried_process
        assert (queried_process.node_key ==
                "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc")
        assert queried_process.process_id == 100
        assert queried_process.process_name == "word.exe"

        assert len(queried_process.wrote_files) == 1
        assert (queried_process.wrote_files[0].node_key ==
                "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833")
        assert queried_process.wrote_files[0].file_path == "/folder/file.txt"
Ejemplo n.º 28
0
def analyzer(graph: Subgraph, sender: Connection):
    try:
        print('analyzing')
        client = DgraphClient(DgraphClientStub('db.mastergraph:9080'))
        _analyzer(client, graph, sender)
    except Exception as e:
        print('analyzer failed: {}'.format(e))
        sender.send(None)
Ejemplo n.º 29
0
    def test_with_bin_file(self) -> None:
        # Given: a process with a pid 100 & process_name word.exe,
        local_client = DgraphClient(DgraphClientStub("localhost:9080"))

        created_timestamp = int(time.time())

        parent_process = {
            "process_id": 100,
            "process_name": "word.exe",
            "created_timestamp": created_timestamp,
        }  # type: Dict[str, Property]

        parent_process_view = upsert(
            local_client,
            "Process",
            ProcessView,
            "635952af-87f3-4a2a-a65d-3f1859db9525",
            parent_process,
        )

        bin_file = {
            "file_path": "/folder/file.txt",
            "created_timestamp": created_timestamp + 1000,
        }  # type: Dict[str, Property]

        bin_file_view = upsert(
            local_client,
            "File",
            FileView,
            "9f16e0c9-33c0-4d18-9878-ef686373570b",
            bin_file,
        )

        create_edge(
            local_client,
            parent_process_view.uid,
            "bin_file",
            bin_file_view.uid,
        )

        queried_process = (ProcessQuery().with_node_key(
            eq="635952af-87f3-4a2a-a65d-3f1859db9525"
        ).with_process_id(eq=100).with_process_name(
            contains="word").with_created_timestamp(
                eq=created_timestamp).with_bin_file(FileQuery().with_node_key(
                    eq="9f16e0c9-33c0-4d18-9878-ef686373570b").with_file_path(
                        eq="/folder/file.txt")).query_first(local_client))

        assert queried_process
        assert "635952af-87f3-4a2a-a65d-3f1859db9525"
        assert queried_process.process_id == 100
        assert queried_process.process_name == "word.exe"
        assert queried_process.created_timestamp == created_timestamp

        bin_file = queried_process.bin_file
        assert bin_file.node_key == "9f16e0c9-33c0-4d18-9878-ef686373570b"

        assert bin_file.file_path == "/folder/file.txt"
Ejemplo n.º 30
0
def lambda_handler(events: Any, context: Any) -> None:
    # Parse sns message
    print("handling")
    print(events)
    print(context)

    alpha_names = os.environ["MG_ALPHAS"].split(",")

    client_stubs = [DgraphClientStub("{}:9080".format(name)) for name in alpha_names]
    client = DgraphClient(*client_stubs)

    s3 = get_s3_client()

    for event in events["Records"]:
        if not IS_LOCAL:
            event = json.loads(event['body'])['Records'][0]
        data = parse_s3_event(s3, event)

        message = json.loads(data)

        print(f'Executing Analyzer: {message["key"]}')
        analyzer = download_s3_file(s3, f"{os.environ['BUCKET_PREFIX']}-analyzers-bucket", message["key"])
        analyzer_name = message["key"].split("/")[-2]

        subgraph = SubgraphView.from_proto(client, bytes(message["subgraph"]))

        # TODO: Validate signature of S3 file
        print(f'event {event}')
        rx, tx = Pipe(duplex=False)  # type: Tuple[Connection, Connection]
        p = Process(target=execute_file, args=(analyzer_name, analyzer, subgraph, tx, ''))

        p.start()
        t = 0

        while True:
            p_res = rx.poll(timeout=5)
            if not p_res:
                t += 1
                print(f"Polled {analyzer_name} for {t * 5} seconds without result")
                continue
            result = rx.recv()  # type: Optional[Any]

            if isinstance(result, ExecutionComplete):
                print("execution complete")
                break

            # emit any hits to an S3 bucket
            if isinstance(result, ExecutionHit):
                print(f"emitting event for {analyzer_name} {result.analyzer_name} {result.root_node_key}")
                emit_event(s3, result)
                update_msg_cache(analyzer, result.root_node_key, message['key'])
                update_hit_cache(analyzer_name, result.root_node_key)

            assert not isinstance(
                result, ExecutionFailed
            ), f"Analyzer {analyzer_name} failed."

        p.join()