def process_filter(self, data, allowed, manifest_info): filtered_by_type = [] filtered_by_id = [] match_type = self.filter_args.get("match[type]") if match_type and "type" in allowed: filtered_by_type = self.filter_by_type(data, match_type) match_id = self.filter_args.get("match[id]") if match_id and "id" in allowed: filtered_by_id = self.filter_by_id(data, match_id) results = [] if filtered_by_type and filtered_by_id: for type_match in filtered_by_type: for id_match in filtered_by_id: if type_match == id_match: results.append(type_match) elif match_type: if filtered_by_type: results.extend(filtered_by_type) elif match_id: if filtered_by_id: results.extend(filtered_by_id) else: results = data match_version = self.filter_args.get("match[version]") if "version" in allowed: if not match_version: match_version = "last" if self.is_manifest_entry(data[0]): results = self.filter_manifest_entries_by_version(results, match_version) else: new_results = [] for bucket in BasicFilter._equivalence_partition_by_id(results): new_results.extend(self.filter_by_version(bucket, match_version)) results = new_results added_after_date = self.filter_args.get("added_after") if added_after_date: added_after_timestamp = common.convert_to_stix_datetime(added_after_date) new_results = [] for obj in results: info = None for item in manifest_info: if item["id"] == obj["id"]: info = item break if info: added_date_timestamp = common.convert_to_stix_datetime(info["date_added"]) if added_date_timestamp > added_after_timestamp: new_results.append(obj) return new_results else: return results
def process_filter(self, data, allowed, manifest_info): results = list(data.find(self.full_query)) if results and self.filter_args: if "version" in allowed: match_version = self.filter_args.get("match[version]") if not match_version: match_version = "last" if self.is_manifest_entry(results[0]): results = self.filter_manifest_entries_by_version( results, match_version) else: new_results = [] for bucket in BasicFilter._equivalence_partition_by_id( results): new_results.extend( self.filter_by_version(bucket, match_version)) results = new_results added_after_date = self.filter_args.get("added_after") if added_after_date: added_after_timestamp = common.convert_to_stix_datetime( added_after_date) new_results = [] for obj in results: info = manifest_info["mongodb_collection"].find_one({ "id": obj["id"], "_collection_id": manifest_info["_collection_id"] }) if info: added_date_timestamp = common.convert_to_stix_datetime( info["date_added"]) if added_date_timestamp > added_after_timestamp: new_results.append(obj) return new_results else: return results return results
def reset_db(): client = connect_to_client() client.drop_database("discovery_database") db = build_new_mongo_databases_and_collection(client) db["discovery_information"].insert_one({ "title": "Some TAXII Server", "description": "This TAXII Server contains a listing of", "contact": "string containing contact information", "api_roots": [] }) client.drop_database("trustgroup1") api_root_db = add_api_root( client, url="http://localhost:5000/trustgroup1/", title="Malware Research Group", description="A trust group setup for malware researchers", max_content_length=9765625, default=True) api_root_db["status"].insert_many([{ "id": "2d086da7-4bdc-4f91-900e-d77486753710", "status": "pending", "request_timestamp": "2016-11-02T12:34:34.12345Z", "total_count": 4, "success_count": 1, "successes": ["indicator--a932fcc6-e032-176c-126f-cb970a5a1ade"], "failure_count": 1, "failures": [{ "id": "malware--664fa29d-bf65-4f28-a667-bdb76f29ec98", "message": "Unable to process object" }], "pending_count": 2, "pendings": [ "indicator--252c7c11-daf2-42bd-843b-be65edca9f61", "relationship--045585ad-a22f-4333-af33-bfd503a683b5" ] }, { "id": "2d086da7-4bdc-4f91-900e-f4566be4b780", "status": "pending", "request_timestamp": "2016-11-02T12:34:34.12345Z", "total_objects": 2, "success_count": 0, "successes": [], "failure_count": 0, "failures": [], "pending_count": 0, "pendings": [] }]) api_root_db["manifests"].insert_many([{ "id": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", "date_added": convert_to_stix_datetime("2016-11-01T03:04:05Z"), "versions": ["2014-05-08T09:00:00.000Z"], "media_types": ["application/vnd.oasis.stix+json; version=2.0"], '_collection_id': '91a7b528-80eb-42ed-a74d-c6fbd5a26116', '_type': 'indicator' }, { "id": "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111", "date_added": convert_to_stix_datetime("2017-01-27T13:49:53.997Z"), "versions": ["2017-01-27T13:49:53.997Z"], "media_types": ["application/vnd.oasis.stix+json; version=2.0"], '_collection_id': '91a7b528-80eb-42ed-a74d-c6fbd5a26116', '_type': 'malware' }, { "id": "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", "date_added": convert_to_stix_datetime("2014-05-08T09:00:00.000Z"), "versions": ["2014-05-08T09:00:00.000Z"], "media_types": ["application/vnd.oasis.stix+json; version=2.0"], '_collection_id': '91a7b528-80eb-42ed-a74d-c6fbd5a26116', '_type': 'relationship' }, { "date_added": convert_to_stix_datetime("2017-01-20T00:00:00.000Z"), "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116", "id": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", "media_types": ["application/vnd.oasis.stix+json; version=2.0"], "versions": ["2017-01-20T00:00:00.000Z"], "_type": "marking-definition" }, { "id": "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f", "date_added": convert_to_stix_datetime("2016-12-27T13:49:53Z"), "versions": ["2016-11-03T12:30:59.000Z", "2017-01-27T13:49:53.935Z"], "media_types": ["application/vnd.oasis.stix+json; version=2.0"], "_collection_id": "52892447-4d7e-4f70-b94d-d7f22742ff63", '_type': 'indicator' }, { "id": "indicator--b81f86b9-975b-bb0b-775e-810c5bd45b4f", "date_added": convert_to_stix_datetime("2016-11-03T12:30:59.000Z"), "versions": ["2016-11-03T12:30:59.000Z"], "media_types": ["application/vnd.oasis.stix+json; version=2.0"], "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116", '_type': 'indicator' }]) api_root_db["collections"].insert_one({ "id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116", "title": "High Value Indicator Collection", "can_read": True, "can_write": True, "media_types": ["application/vnd.oasis.stix+json; version=2.0"] }) api_root_db["collections"].insert_one({ "id": "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4", "title": "Empty test Collection", "description": "This data collection is for testing querying across collections", "can_read": True, "can_write": True, "media_types": ["application/vnd.oasis.stix+json; version=2.0"] }) api_root_db["collections"].insert_one({ "id": "52892447-4d7e-4f70-b94d-d7f22742ff63", "title": "Indicators from the past 24-hours", "description": "This data collection is for collecting current IOCs", "can_read": True, "can_write": False, "media_types": ["application/vnd.oasis.stix+json; version=2.0"] }) api_root_db["collections"].insert_one({ "id": "64993447-4d7e-4f70-b94d-d7f33742ee63", "title": "Secret Indicators", "description": "Non accessible", "can_read": False, "can_write": False, "media_types": ["application/vnd.oasis.stix+json; version=2.0"] }) api_root_db["objects"].insert_many([{ "created": "2016-11-03T12:30:59.000Z", "id": "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f", "labels": ["url-watchlist"], "modified": "2017-01-27T13:49:53.935Z", "name": "Malicious site hosting downloader", "pattern": "[url:value = 'http://x4z9arb.cn/4712']", "type": "indicator", "valid_from": "2016-11-03T12:30:59.000Z", "_collection_id": "52892447-4d7e-4f70-b94d-d7f22742ff63" }, { "created": "2016-11-03T12:30:59.000Z", "description": "Accessing this url will infect your machine with malware.", "id": "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f", "labels": ["url-watchlist"], "modified": "2016-11-03T12:30:59.000Z", "name": "Malicious site hosting downloader", "pattern": "[url:value = 'http://x4z9arb.cn/4712']", "type": "indicator", "valid_from": "2017-01-27T13:49:53.935382Z", "_collection_id": "52892447-4d7e-4f70-b94d-d7f22742ff63" }, { "created": "2017-01-27T13:49:53.997Z", "description": "Poison Ivy", "id": "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111", "labels": ["remote-access-trojan"], "modified": "2017-01-27T13:49:53.997Z", "name": "Poison Ivy", "type": "malware", "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" }, { "created": "2014-05-08T09:00:00.000Z", "id": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", "labels": ["file-hash-watchlist"], "modified": "2014-05-08T09:00:00.000Z", "name": "File hash for Poison Ivy variant", "pattern": "[file:hashes.'SHA-256' = 'ef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c']", "type": "indicator", "valid_from": "2014-05-08T09:00:00.000000Z", "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" }, { "created": "2014-05-08T09:00:00.000Z", "id": "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", "modified": "2014-05-08T09:00:00.000Z", "relationship_type": "indicates", "source_ref": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", "target_ref": "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111", "type": "relationship", "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" }, { "type": "marking-definition", "_collection_id": '91a7b528-80eb-42ed-a74d-c6fbd5a26116', "id": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", "created": "2017-01-20T00:00:00.000Z", "definition_type": "tlp", "definition": { "tlp": "green" } }, { "created": "2016-11-03T12:30:59.000Z", "description": "Accessing this url will infect your machine with malware.", "id": "indicator--b81f86b9-975b-bb0b-775e-810c5bd45b4f", "labels": ["url-watchlist"], "modified": "2016-11-03T12:30:59.000Z", "name": "Malicious site hosting downloader", "pattern": "[url:value = 'http://z4z10farb.cn/4712']", "type": "indicator", "valid_from": "2017-01-27T13:49:53.935382Z", "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" }]) client.drop_database("api2") api_root_db = add_api_root(client, url="http://localhost:5000/api2/", title="STIX 2.0 Indicator Collections", description="A repo for general STIX data.", max_content_length=9765625) dateIdx = IndexModel([("date_added", ASCENDING)]) idIdx = IndexModel([("id", ASCENDING)]) colIdIdx = IndexModel([("_collection_id", ASCENDING)]) colIdDateIdx = IndexModel([("_collection_id", ASCENDING), ('date_added', ASCENDING)]) typeIdx = IndexModel([("_type", ASCENDING)]) api_root_db["manifests"].create_indexes( [dateIdx, idIdx, colIdIdx, colIdDateIdx, typeIdx]) api_root_db["objects"].create_indexes([idIdx])
def process_filter(self, data, allowed, manifest_info): match_filter = {'$match': self.full_query} pipeline = [match_filter] # create added_after filter added_after_date = self.filter_args.get("added_after") if added_after_date: added_after_timestamp = common.convert_to_stix_datetime( added_after_date) date_filter = { '$match': { 'date_added': { '$gt': added_after_timestamp } } } pipeline.append(date_filter) # need to handle marking-definitions differently as they are not versioned like SDO's if self.filter_contains_marking_definition(pipeline): # If we are finding marking-definitions from the objects collection we need to change the match criteria from "_type" to "type" if data.name == "objects" and "_type" in pipeline[0][ "$match"].keys(): pipeline[0]["$match"]["type"] = pipeline[0]["$match"].pop( "_type") cursor = data.aggregate(pipeline) results = list(cursor) return results # create version filter if "version" in allowed: match_version = self.filter_args.get("match[version]") if not match_version: match_version = "last" if "all" not in match_version: actual_dates = [ x for x in match_version.split(",") if (x != "first" and x != "last") ] # If specific dates have been selected, then we add these to the $match criteria # created from the self.full_query at the beginning of this method. The reason we need # to do this is because the $indexOfArray function below will return -1 if the date # doesn't exist in the versions array. -1 will be interrpreted by $arrayElemAt as the # final element in the array and we will return the wrong result. i.e. not only will the # version dates be incorrect, but we shouldn't have returned a result at all. # if actual_dates: if len(actual_dates) > 0: pipeline.insert(1, { '$match': { 'versions': { '$all': [",".join(actual_dates)] } } }) # The versions array in the mongodb document is ordered newest to oldest, so the 'last' # (most recent date) is in first position in the list and the oldest 'first' is in # the last position (equal to index -1 for $arrayElemAt) version_selector = [] if "last" in match_version: version_selector.append({'$arrayElemAt': ["$versions", 0]}) if "first" in match_version: version_selector.append( {'$arrayElemAt': ["$versions", -1]}) for d in actual_dates: version_selector.append({ '$arrayElemAt': ["$versions", { '$indexOfArray': ["$versions", d] }] }) version_filter = { '$project': { 'id': 1, 'date_added': 1, 'versions': version_selector, 'media_types': 1 } } pipeline.append(version_filter) if data._Collection__name == "manifests": cursor = data.aggregate(pipeline) results = list(cursor) else: # Join the filtered manifest(s) to the objects collection join_objects = { '$lookup': { 'from': "objects", 'localField': "id", 'foreignField': "id", 'as': "obj" } } pipeline.append(join_objects) # Copy the filtered version list to the embedded object document project_objects = { '$project': { 'obj.versions': '$versions', 'obj.id': 1, 'obj.modified': 1, 'obj.created': 1, 'obj.labels': 1, 'obj.name': 1, 'obj.pattern': 1, 'obj.type': 1, 'obj.valid_from': 1, 'obj.created_by_ref': 1, 'obj.object_marking_refs': 1 } } pipeline.append(project_objects) # denormalise the embedded objects and replace the document root pipeline.append({'$unwind': '$obj'}) pipeline.append({'$replaceRoot': {'newRoot': "$obj"}}) # Redact the result set removing objects where the modified date is not in # the versions array redact_objects = { '$redact': { '$cond': { 'if': { '$setIsSubset': [["$modified"], "$versions"] }, 'then': "$$KEEP", 'else': "$$PRUNE" } } } pipeline.append(redact_objects) # Project the final results project_results = {'$project': {'versions': 0}} pipeline.append(project_results) cursor = manifest_info["mongodb_collection"].aggregate(pipeline) results = list(cursor) return results
def process_filter(self, data, allowed, manifest_info): match_filter = {"$match": self.full_query} pipeline = [match_filter] # create added_after filter added_after_date = self.filter_args.get("added_after") if added_after_date: added_after_timestamp = convert_to_stix_datetime(added_after_date) date_filter = { "$match": { "date_added": { "$gt": added_after_timestamp } } } pipeline.append(date_filter) # need to handle marking-definitions differently as they are not versioned like SDO's if self.filter_contains_marking_definition(pipeline): # If we are finding marking-definitions from the objects collection # we need to change the match criteria from "_type" to "type" if data.name == "objects" and "_type" in pipeline[0][ "$match"].keys(): pipeline[0]["$match"]["type"] = pipeline[0]["$match"].pop( "_type") # Calculate total number of matching documents if data.name == "objects": count = self.get_result_count( pipeline, manifest_info["mongodb_collection"]) else: count = self.get_result_count(pipeline, data) self.add_pagination_operations(pipeline) cursor = data.aggregate(pipeline) results = list(cursor) return count, results # create version filter if "version" in allowed: match_version = self.filter_args.get("match[version]") if not match_version: match_version = "last" if "all" not in match_version: actual_dates = [ x for x in match_version.split(",") if (x != "first" and x != "last") ] # If specific dates have been selected, then we add these to the $match criteria # created from the self.full_query at the beginning of this method. The reason we need # to do this is because the $indexOfArray function below will return -1 if the date # doesn't exist in the versions array. -1 will be interpreted by $arrayElemAt as the # final element in the array and we will return the wrong result. i.e. not only will the # version dates be incorrect, but we shouldn't have returned a result at all. # if actual_dates: if len(actual_dates) > 0: pipeline.insert(1, { "$match": { "versions": { "$all": [",".join(actual_dates)] } } }) # The versions array in the mongodb document is ordered newest to oldest, so the 'last' # (most recent date) is in first position in the list and the oldest 'first' is in # the last position (equal to index -1 for $arrayElemAt) version_selector = [] if "last" in match_version: version_selector.append({"$arrayElemAt": ["$versions", 0]}) if "first" in match_version: version_selector.append( {"$arrayElemAt": ["$versions", -1]}) for d in actual_dates: version_selector.append({ "$arrayElemAt": ["$versions", { "$indexOfArray": ["$versions", d] }] }) version_filter = { "$addFields": { "versions": version_selector, }, } pipeline.append(version_filter) if data.name == "manifests": count = self.get_result_count(pipeline, data) self.add_pagination_operations(pipeline) cursor = data.aggregate(pipeline) results = list(cursor) else: results = [] # Get the count of matching documents - need to unwind the versions selected to get accurate count. count_pipeline = list(pipeline) count_pipeline.append({"$unwind": "$versions"}) count = self.get_result_count(count_pipeline, manifest_info["mongodb_collection"]) # only bother doing the rest of the query if the start index is less than the total number of results. if self.start_index < count: # Join the filtered manifest(s) to the objects collection join_objects = { "$lookup": { "from": "objects", "localField": "id", "foreignField": "id", "as": "obj", }, } pipeline.append(join_objects) # Copy the filtered version list to the embedded object document add_versions = { "$addFields": { "obj.versions": "$versions" }, } pipeline.append(add_versions) # denormalize the embedded objects and replace the document root pipeline.append({"$unwind": "$obj"}) pipeline.append({"$replaceRoot": {"newRoot": "$obj"}}) # Redact the result set removing objects where the modified date is not in # the versions array and the object isn't in the correct collection. # The collection filter is required because the join between manifests and objects # does not include collection_id col_id = self.full_query["_collection_id"] redact_objects = { "$redact": { "$cond": { "if": { "$and": [ { "$eq": ["$_collection_id", col_id] }, { "$or": [ { "$eq": [ "$type", "marking-definition" ] }, { "$setIsSubset": [["$modified"], "$versions"] }, ], }, ], }, "then": "$$KEEP", "else": "$$PRUNE", }, }, } pipeline.append(redact_objects) # Project the final results project_results = { "$project": { "versions": 0, }, } pipeline.append(project_results) self.add_pagination_operations(pipeline) cursor = manifest_info["mongodb_collection"].aggregate( pipeline) results = list(cursor) return count, results