Esempio n. 1
0
    def process_filter(self, data, allowed, manifest_info):

        filtered_by_type = []
        filtered_by_id = []

        match_type = self.filter_args.get("match[type]")
        if match_type and "type" in allowed:
            filtered_by_type = self.filter_by_type(data, match_type)

        match_id = self.filter_args.get("match[id]")
        if match_id and "id" in allowed:
            filtered_by_id = self.filter_by_id(data, match_id)

        results = []

        if filtered_by_type and filtered_by_id:
            for type_match in filtered_by_type:
                for id_match in filtered_by_id:
                    if type_match == id_match:
                        results.append(type_match)
        elif match_type:
            if filtered_by_type:
                results.extend(filtered_by_type)

        elif match_id:
            if filtered_by_id:
                results.extend(filtered_by_id)

        else:
            results = data

        match_version = self.filter_args.get("match[version]")
        if "version" in allowed:
            if not match_version:
                match_version = "last"
            if self.is_manifest_entry(data[0]):
                results = self.filter_manifest_entries_by_version(results, match_version)
            else:
                new_results = []
                for bucket in BasicFilter._equivalence_partition_by_id(results):
                    new_results.extend(self.filter_by_version(bucket, match_version))
                results = new_results
        added_after_date = self.filter_args.get("added_after")
        if added_after_date:
            added_after_timestamp = common.convert_to_stix_datetime(added_after_date)
            new_results = []
            for obj in results:
                info = None
                for item in manifest_info:
                    if item["id"] == obj["id"]:
                        info = item
                        break
                if info:
                    added_date_timestamp = common.convert_to_stix_datetime(info["date_added"])
                    if added_date_timestamp > added_after_timestamp:
                        new_results.append(obj)
            return new_results
        else:
            return results
Esempio n. 2
0
 def process_filter(self, data, allowed, manifest_info):
     results = list(data.find(self.full_query))
     if results and self.filter_args:
         if "version" in allowed:
             match_version = self.filter_args.get("match[version]")
             if not match_version:
                 match_version = "last"
             if self.is_manifest_entry(results[0]):
                 results = self.filter_manifest_entries_by_version(
                     results, match_version)
             else:
                 new_results = []
                 for bucket in BasicFilter._equivalence_partition_by_id(
                         results):
                     new_results.extend(
                         self.filter_by_version(bucket, match_version))
                 results = new_results
         added_after_date = self.filter_args.get("added_after")
         if added_after_date:
             added_after_timestamp = common.convert_to_stix_datetime(
                 added_after_date)
             new_results = []
             for obj in results:
                 info = manifest_info["mongodb_collection"].find_one({
                     "id":
                     obj["id"],
                     "_collection_id":
                     manifest_info["_collection_id"]
                 })
                 if info:
                     added_date_timestamp = common.convert_to_stix_datetime(
                         info["date_added"])
                     if added_date_timestamp > added_after_timestamp:
                         new_results.append(obj)
             return new_results
         else:
             return results
     return results
def reset_db():
    client = connect_to_client()
    client.drop_database("discovery_database")
    db = build_new_mongo_databases_and_collection(client)

    db["discovery_information"].insert_one({
        "title": "Some TAXII Server",
        "description": "This TAXII Server contains a listing of",
        "contact": "string containing contact information",
        "api_roots": []
    })
    client.drop_database("trustgroup1")
    api_root_db = add_api_root(
        client,
        url="http://localhost:5000/trustgroup1/",
        title="Malware Research Group",
        description="A trust group setup for malware researchers",
        max_content_length=9765625,
        default=True)
    api_root_db["status"].insert_many([{
        "id":
        "2d086da7-4bdc-4f91-900e-d77486753710",
        "status":
        "pending",
        "request_timestamp":
        "2016-11-02T12:34:34.12345Z",
        "total_count":
        4,
        "success_count":
        1,
        "successes": ["indicator--a932fcc6-e032-176c-126f-cb970a5a1ade"],
        "failure_count":
        1,
        "failures": [{
            "id": "malware--664fa29d-bf65-4f28-a667-bdb76f29ec98",
            "message": "Unable to process object"
        }],
        "pending_count":
        2,
        "pendings": [
            "indicator--252c7c11-daf2-42bd-843b-be65edca9f61",
            "relationship--045585ad-a22f-4333-af33-bfd503a683b5"
        ]
    }, {
        "id": "2d086da7-4bdc-4f91-900e-f4566be4b780",
        "status": "pending",
        "request_timestamp": "2016-11-02T12:34:34.12345Z",
        "total_objects": 2,
        "success_count": 0,
        "successes": [],
        "failure_count": 0,
        "failures": [],
        "pending_count": 0,
        "pendings": []
    }])

    api_root_db["manifests"].insert_many([{
        "id":
        "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade",
        "date_added":
        convert_to_stix_datetime("2016-11-01T03:04:05Z"),
        "versions": ["2014-05-08T09:00:00.000Z"],
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        '_collection_id':
        '91a7b528-80eb-42ed-a74d-c6fbd5a26116',
        '_type':
        'indicator'
    }, {
        "id":
        "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111",
        "date_added":
        convert_to_stix_datetime("2017-01-27T13:49:53.997Z"),
        "versions": ["2017-01-27T13:49:53.997Z"],
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        '_collection_id':
        '91a7b528-80eb-42ed-a74d-c6fbd5a26116',
        '_type':
        'malware'
    }, {
        "id":
        "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463",
        "date_added":
        convert_to_stix_datetime("2014-05-08T09:00:00.000Z"),
        "versions": ["2014-05-08T09:00:00.000Z"],
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        '_collection_id':
        '91a7b528-80eb-42ed-a74d-c6fbd5a26116',
        '_type':
        'relationship'
    }, {
        "date_added":
        convert_to_stix_datetime("2017-01-20T00:00:00.000Z"),
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116",
        "id":
        "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da",
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        "versions": ["2017-01-20T00:00:00.000Z"],
        "_type":
        "marking-definition"
    }, {
        "id":
        "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f",
        "date_added":
        convert_to_stix_datetime("2016-12-27T13:49:53Z"),
        "versions": ["2016-11-03T12:30:59.000Z", "2017-01-27T13:49:53.935Z"],
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        "_collection_id":
        "52892447-4d7e-4f70-b94d-d7f22742ff63",
        '_type':
        'indicator'
    }, {
        "id":
        "indicator--b81f86b9-975b-bb0b-775e-810c5bd45b4f",
        "date_added":
        convert_to_stix_datetime("2016-11-03T12:30:59.000Z"),
        "versions": ["2016-11-03T12:30:59.000Z"],
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"],
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116",
        '_type':
        'indicator'
    }])

    api_root_db["collections"].insert_one({
        "id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116",
        "title":
        "High Value Indicator Collection",
        "can_read":
        True,
        "can_write":
        True,
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"]
    })

    api_root_db["collections"].insert_one({
        "id":
        "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4",
        "title":
        "Empty test Collection",
        "description":
        "This data collection is for testing querying across collections",
        "can_read":
        True,
        "can_write":
        True,
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"]
    })

    api_root_db["collections"].insert_one({
        "id":
        "52892447-4d7e-4f70-b94d-d7f22742ff63",
        "title":
        "Indicators from the past 24-hours",
        "description":
        "This data collection is for collecting current IOCs",
        "can_read":
        True,
        "can_write":
        False,
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"]
    })

    api_root_db["collections"].insert_one({
        "id":
        "64993447-4d7e-4f70-b94d-d7f33742ee63",
        "title":
        "Secret Indicators",
        "description":
        "Non accessible",
        "can_read":
        False,
        "can_write":
        False,
        "media_types": ["application/vnd.oasis.stix+json; version=2.0"]
    })

    api_root_db["objects"].insert_many([{
        "created":
        "2016-11-03T12:30:59.000Z",
        "id":
        "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f",
        "labels": ["url-watchlist"],
        "modified":
        "2017-01-27T13:49:53.935Z",
        "name":
        "Malicious site hosting downloader",
        "pattern":
        "[url:value = 'http://x4z9arb.cn/4712']",
        "type":
        "indicator",
        "valid_from":
        "2016-11-03T12:30:59.000Z",
        "_collection_id":
        "52892447-4d7e-4f70-b94d-d7f22742ff63"
    }, {
        "created":
        "2016-11-03T12:30:59.000Z",
        "description":
        "Accessing this url will infect your machine with malware.",
        "id":
        "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f",
        "labels": ["url-watchlist"],
        "modified":
        "2016-11-03T12:30:59.000Z",
        "name":
        "Malicious site hosting downloader",
        "pattern":
        "[url:value = 'http://x4z9arb.cn/4712']",
        "type":
        "indicator",
        "valid_from":
        "2017-01-27T13:49:53.935382Z",
        "_collection_id":
        "52892447-4d7e-4f70-b94d-d7f22742ff63"
    }, {
        "created":
        "2017-01-27T13:49:53.997Z",
        "description":
        "Poison Ivy",
        "id":
        "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111",
        "labels": ["remote-access-trojan"],
        "modified":
        "2017-01-27T13:49:53.997Z",
        "name":
        "Poison Ivy",
        "type":
        "malware",
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116"
    }, {
        "created":
        "2014-05-08T09:00:00.000Z",
        "id":
        "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade",
        "labels": ["file-hash-watchlist"],
        "modified":
        "2014-05-08T09:00:00.000Z",
        "name":
        "File hash for Poison Ivy variant",
        "pattern":
        "[file:hashes.'SHA-256' = 'ef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c']",
        "type":
        "indicator",
        "valid_from":
        "2014-05-08T09:00:00.000000Z",
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116"
    }, {
        "created":
        "2014-05-08T09:00:00.000Z",
        "id":
        "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463",
        "modified":
        "2014-05-08T09:00:00.000Z",
        "relationship_type":
        "indicates",
        "source_ref":
        "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade",
        "target_ref":
        "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111",
        "type":
        "relationship",
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116"
    }, {
        "type": "marking-definition",
        "_collection_id": '91a7b528-80eb-42ed-a74d-c6fbd5a26116',
        "id": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da",
        "created": "2017-01-20T00:00:00.000Z",
        "definition_type": "tlp",
        "definition": {
            "tlp": "green"
        }
    }, {
        "created":
        "2016-11-03T12:30:59.000Z",
        "description":
        "Accessing this url will infect your machine with malware.",
        "id":
        "indicator--b81f86b9-975b-bb0b-775e-810c5bd45b4f",
        "labels": ["url-watchlist"],
        "modified":
        "2016-11-03T12:30:59.000Z",
        "name":
        "Malicious site hosting downloader",
        "pattern":
        "[url:value = 'http://z4z10farb.cn/4712']",
        "type":
        "indicator",
        "valid_from":
        "2017-01-27T13:49:53.935382Z",
        "_collection_id":
        "91a7b528-80eb-42ed-a74d-c6fbd5a26116"
    }])

    client.drop_database("api2")
    api_root_db = add_api_root(client,
                               url="http://localhost:5000/api2/",
                               title="STIX 2.0 Indicator Collections",
                               description="A repo for general STIX data.",
                               max_content_length=9765625)

    dateIdx = IndexModel([("date_added", ASCENDING)])
    idIdx = IndexModel([("id", ASCENDING)])
    colIdIdx = IndexModel([("_collection_id", ASCENDING)])
    colIdDateIdx = IndexModel([("_collection_id", ASCENDING),
                               ('date_added', ASCENDING)])
    typeIdx = IndexModel([("_type", ASCENDING)])
    api_root_db["manifests"].create_indexes(
        [dateIdx, idIdx, colIdIdx, colIdDateIdx, typeIdx])
    api_root_db["objects"].create_indexes([idIdx])
Esempio n. 4
0
    def process_filter(self, data, allowed, manifest_info):
        match_filter = {'$match': self.full_query}
        pipeline = [match_filter]

        # create added_after filter
        added_after_date = self.filter_args.get("added_after")
        if added_after_date:
            added_after_timestamp = common.convert_to_stix_datetime(
                added_after_date)
            date_filter = {
                '$match': {
                    'date_added': {
                        '$gt': added_after_timestamp
                    }
                }
            }
            pipeline.append(date_filter)

        # need to handle marking-definitions differently as they are not versioned like SDO's
        if self.filter_contains_marking_definition(pipeline):
            # If we are finding marking-definitions from the objects collection we need to change the match criteria from "_type" to "type"
            if data.name == "objects" and "_type" in pipeline[0][
                    "$match"].keys():
                pipeline[0]["$match"]["type"] = pipeline[0]["$match"].pop(
                    "_type")
            cursor = data.aggregate(pipeline)
            results = list(cursor)

            return results

        # create version filter
        if "version" in allowed:
            match_version = self.filter_args.get("match[version]")
            if not match_version:
                match_version = "last"
            if "all" not in match_version:
                actual_dates = [
                    x for x in match_version.split(",")
                    if (x != "first" and x != "last")
                ]
                # If specific dates have been selected, then we add these to the $match criteria
                # created from the self.full_query at the beginning of this method. The reason we need
                # to do this is because the $indexOfArray function below will return -1 if the date
                # doesn't exist in the versions array. -1 will be interrpreted by $arrayElemAt as the
                # final element in the array and we will return the wrong result. i.e. not only will the
                # version dates be incorrect, but we shouldn't have returned a result at all.
                # if actual_dates:
                if len(actual_dates) > 0:
                    pipeline.insert(1, {
                        '$match': {
                            'versions': {
                                '$all': [",".join(actual_dates)]
                            }
                        }
                    })

                # The versions array in the mongodb document is ordered newest to oldest, so the 'last'
                # (most recent date) is in first position in the list and the oldest 'first' is in
                # the last position (equal to index -1 for $arrayElemAt)
                version_selector = []
                if "last" in match_version:
                    version_selector.append({'$arrayElemAt': ["$versions", 0]})
                if "first" in match_version:
                    version_selector.append(
                        {'$arrayElemAt': ["$versions", -1]})
                for d in actual_dates:
                    version_selector.append({
                        '$arrayElemAt':
                        ["$versions", {
                            '$indexOfArray': ["$versions", d]
                        }]
                    })
                version_filter = {
                    '$project': {
                        'id': 1,
                        'date_added': 1,
                        'versions': version_selector,
                        'media_types': 1
                    }
                }
                pipeline.append(version_filter)

        if data._Collection__name == "manifests":
            cursor = data.aggregate(pipeline)
            results = list(cursor)
        else:
            # Join the filtered manifest(s) to the objects collection
            join_objects = {
                '$lookup': {
                    'from': "objects",
                    'localField': "id",
                    'foreignField': "id",
                    'as': "obj"
                }
            }
            pipeline.append(join_objects)
            # Copy the filtered version list to the embedded object document
            project_objects = {
                '$project': {
                    'obj.versions': '$versions',
                    'obj.id': 1,
                    'obj.modified': 1,
                    'obj.created': 1,
                    'obj.labels': 1,
                    'obj.name': 1,
                    'obj.pattern': 1,
                    'obj.type': 1,
                    'obj.valid_from': 1,
                    'obj.created_by_ref': 1,
                    'obj.object_marking_refs': 1
                }
            }
            pipeline.append(project_objects)
            # denormalise the embedded objects and replace the document root
            pipeline.append({'$unwind': '$obj'})
            pipeline.append({'$replaceRoot': {'newRoot': "$obj"}})
            # Redact the result set removing objects where the modified date is not in
            # the versions array
            redact_objects = {
                '$redact': {
                    '$cond': {
                        'if': {
                            '$setIsSubset': [["$modified"], "$versions"]
                        },
                        'then': "$$KEEP",
                        'else': "$$PRUNE"
                    }
                }
            }
            pipeline.append(redact_objects)
            # Project the final results
            project_results = {'$project': {'versions': 0}}
            pipeline.append(project_results)
            cursor = manifest_info["mongodb_collection"].aggregate(pipeline)
            results = list(cursor)

        return results
Esempio n. 5
0
    def process_filter(self, data, allowed, manifest_info):
        match_filter = {"$match": self.full_query}
        pipeline = [match_filter]

        # create added_after filter
        added_after_date = self.filter_args.get("added_after")
        if added_after_date:
            added_after_timestamp = convert_to_stix_datetime(added_after_date)
            date_filter = {
                "$match": {
                    "date_added": {
                        "$gt": added_after_timestamp
                    }
                }
            }
            pipeline.append(date_filter)

        # need to handle marking-definitions differently as they are not versioned like SDO's
        if self.filter_contains_marking_definition(pipeline):
            # If we are finding marking-definitions from the objects collection
            # we need to change the match criteria from "_type" to "type"
            if data.name == "objects" and "_type" in pipeline[0][
                    "$match"].keys():
                pipeline[0]["$match"]["type"] = pipeline[0]["$match"].pop(
                    "_type")

            # Calculate total number of matching documents
            if data.name == "objects":
                count = self.get_result_count(
                    pipeline, manifest_info["mongodb_collection"])
            else:
                count = self.get_result_count(pipeline, data)

            self.add_pagination_operations(pipeline)

            cursor = data.aggregate(pipeline)
            results = list(cursor)

            return count, results

        # create version filter
        if "version" in allowed:
            match_version = self.filter_args.get("match[version]")
            if not match_version:
                match_version = "last"
            if "all" not in match_version:
                actual_dates = [
                    x for x in match_version.split(",")
                    if (x != "first" and x != "last")
                ]
                # If specific dates have been selected, then we add these to the $match criteria
                # created from the self.full_query at the beginning of this method. The reason we need
                # to do this is because the $indexOfArray function below will return -1 if the date
                # doesn't exist in the versions array. -1 will be interpreted by $arrayElemAt as the
                # final element in the array and we will return the wrong result. i.e. not only will the
                # version dates be incorrect, but we shouldn't have returned a result at all.
                # if actual_dates:
                if len(actual_dates) > 0:
                    pipeline.insert(1, {
                        "$match": {
                            "versions": {
                                "$all": [",".join(actual_dates)]
                            }
                        }
                    })

                # The versions array in the mongodb document is ordered newest to oldest, so the 'last'
                # (most recent date) is in first position in the list and the oldest 'first' is in
                # the last position (equal to index -1 for $arrayElemAt)
                version_selector = []
                if "last" in match_version:
                    version_selector.append({"$arrayElemAt": ["$versions", 0]})
                if "first" in match_version:
                    version_selector.append(
                        {"$arrayElemAt": ["$versions", -1]})
                for d in actual_dates:
                    version_selector.append({
                        "$arrayElemAt":
                        ["$versions", {
                            "$indexOfArray": ["$versions", d]
                        }]
                    })
                version_filter = {
                    "$addFields": {
                        "versions": version_selector,
                    },
                }
                pipeline.append(version_filter)

        if data.name == "manifests":
            count = self.get_result_count(pipeline, data)
            self.add_pagination_operations(pipeline)

            cursor = data.aggregate(pipeline)
            results = list(cursor)
        else:
            results = []
            # Get the count of matching documents - need to unwind the versions selected to get accurate count.
            count_pipeline = list(pipeline)
            count_pipeline.append({"$unwind": "$versions"})
            count = self.get_result_count(count_pipeline,
                                          manifest_info["mongodb_collection"])

            # only bother doing the rest of the query if the start index is less than the total number of results.
            if self.start_index < count:
                # Join the filtered manifest(s) to the objects collection
                join_objects = {
                    "$lookup": {
                        "from": "objects",
                        "localField": "id",
                        "foreignField": "id",
                        "as": "obj",
                    },
                }
                pipeline.append(join_objects)
                # Copy the filtered version list to the embedded object document
                add_versions = {
                    "$addFields": {
                        "obj.versions": "$versions"
                    },
                }
                pipeline.append(add_versions)
                # denormalize the embedded objects and replace the document root
                pipeline.append({"$unwind": "$obj"})
                pipeline.append({"$replaceRoot": {"newRoot": "$obj"}})
                # Redact the result set removing objects where the modified date is not in
                # the versions array and the object isn't in the correct collection.
                # The collection filter is required because the join between manifests and objects
                # does not include collection_id
                col_id = self.full_query["_collection_id"]
                redact_objects = {
                    "$redact": {
                        "$cond": {
                            "if": {
                                "$and": [
                                    {
                                        "$eq": ["$_collection_id", col_id]
                                    },
                                    {
                                        "$or": [
                                            {
                                                "$eq": [
                                                    "$type",
                                                    "marking-definition"
                                                ]
                                            },
                                            {
                                                "$setIsSubset": [["$modified"],
                                                                 "$versions"]
                                            },
                                        ],
                                    },
                                ],
                            },
                            "then": "$$KEEP",
                            "else": "$$PRUNE",
                        },
                    },
                }
                pipeline.append(redact_objects)
                # Project the final results
                project_results = {
                    "$project": {
                        "versions": 0,
                    },
                }
                pipeline.append(project_results)
                self.add_pagination_operations(pipeline)

                cursor = manifest_info["mongodb_collection"].aggregate(
                    pipeline)
                results = list(cursor)

        return count, results