def query(self, query=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied Returns: (list): list of STIX objects that matches the supplied query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory as they are supplied (either as python dictionary or STIX object), it is returned in the same form as it as added. """ if query is None: query = FilterSet() else: query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # Apply STIX common property filters. all_data = list(apply_common_filters(self._data.values(), query)) return all_data
def query(self, query=None, version=None, _composite_filters=None): """Search and retreive STIX objects based on the complete query A "complete query" includes the filters from the query, the filters attached to MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters) Args: query (list): list of filters to search on version (str): If present, it forces the parser to use the version provided. Otherwise, the library will make the best effort based on checking the "spec_version" property. _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are received from TAXII as dicts, parsed into python STIX objects and then returned. """ query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # parse taxii query params (that can be applied remotely) taxii_filters = self._parse_taxii_filters(query) # taxii2client requires query params as keywords taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters) # query TAXII collection try: all_data = self.collection.get_objects(**taxii_filters_dict)['objects'] # deduplicate data (before filtering as reduces wasted filtering) all_data = deduplicate(all_data) # apply local (CompositeDataSource, TAXIICollectionSource and query) filters query.remove(taxii_filters) all_data = list(apply_common_filters(all_data, query)) except HTTPError as e: # if resources not found or access is denied from TAXII server, return empty list if e.response.status_code == 404: raise DataSourceError( "The requested STIX objects for the TAXII Collection resource defined in" " the supplied TAXII Collection object are either not found or access is" " denied. Received error: ", e, ) # parse python STIX objects from the STIX object dicts stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data] return stix_objs
def query(self, query=None, version=None, _composite_filters=None): """Search and retreive STIX objects based on the complete query A "complete query" includes the filters from the query, the filters attached to MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters) Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are received from TAXII as dicts, parsed into python STIX objects and then returned. """ query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # parse taxii query params (that can be applied remotely) taxii_filters = self._parse_taxii_filters(query) # taxii2client requires query params as keywords taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters) # query TAXII collection try: all_data = self.collection.get_objects( **taxii_filters_dict)["objects"] # deduplicate data (before filtering as reduces wasted filtering) all_data = deduplicate(all_data) # apply local (CompositeDataSource, TAXIICollectionSource and query) filters query.remove(taxii_filters) all_data = list(apply_common_filters(all_data, query)) except HTTPError: # if resources not found or access is denied from TAXII server, return empty list all_data = [] # parse python STIX objects from the STIX object dicts stix_objs = [ parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data ] return stix_objs
def query(self, query=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied Returns: (list): list of STIX objects that match the supplied query. """ query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) all_objs = itertools.chain.from_iterable( value.all_versions.values() if isinstance(value, _ObjectFamily ) else [value] for value in self._data.values()) # Apply STIX common property filters. all_data = list(apply_common_filters(all_objs, query)) return all_data
def query(self, query=None, version=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this FileSystemSource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied version (str): If present, it forces the parser to use the version provided. Otherwise, the library will make the best effort based on checking the "spec_version" property. Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are loaded from their json files, parsed into a python STIX objects and then returned. """ all_data = [] query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) auth_types, auth_ids = _find_search_optimizations(query) type_dirs = _get_matching_dir_entries( self._stix_dir, auth_types, stat.S_ISDIR, ) for type_dir in type_dirs: type_path = os.path.join(self._stix_dir, type_dir) type_is_versioned = _is_versioned_type_dir(type_path, type_dir) if type_is_versioned: type_results = _search_versioned( query, type_path, auth_ids, self.allow_custom, version, self.encoding, ) else: type_results = _search_unversioned( query, type_path, auth_ids, self.allow_custom, version, self.encoding, ) all_data.extend(type_results) return all_data
def related_to(self, obj, relationship_type=None, source_only=False, target_only=False, filters=None): """Retrieve STIX Objects that have a Relationship involving the given STIX object. Only one of `source_only` and `target_only` may be `True`. Args: obj (STIX object OR dict OR str): The STIX object (or its ID) whose related objects will be looked up. relationship_type (str): Only retrieve objects related by this Relationships type. If None, all related objects will be returned, regardless of type. source_only (bool): Only examine Relationships for which this object is the source_ref. Default: False. target_only (bool): Only examine Relationships for which this object is the target_ref. Default: False. filters (list): list of additional filters the related objects must match. Returns: list: The STIX objects related to the given STIX object. """ results = [] rels = self.relationships(obj, relationship_type, source_only, target_only) try: obj_id = obj['id'] except TypeError: # Assume `obj` is an ID string obj_id = obj # Get all unique ids from the relationships except that of the object ids = set() for r in rels: ids.update((r.source_ref, r.target_ref)) ids.discard(obj_id) # Assemble filters filter_list = FilterSet(filters) for i in ids: results.extend( self.query([f for f in filter_list] + [Filter('id', '=', i)])) return results
def get(self, stix_id, version=None, _composite_filters=None): """Retrieve STIX object from local/remote STIX Collection endpoint. Args: stix_id (str): The STIX ID of the STIX object to be retrieved. version (str): If present, it forces the parser to use the version provided. Otherwise, the library will make the best effort based on checking the "spec_version" property. _composite_filters (FilterSet): collection of filters passed from the parent CompositeDataSource, not user supplied Returns: (STIX object): STIX object that has the supplied STIX ID. The STIX object is received from TAXII has dict, parsed into a python STIX object and then returned """ # combine all query filters query = FilterSet() if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # don't extract TAXII filters from query (to send to TAXII endpoint) # as directly retrieving a STIX object by ID try: stix_objs = self.collection.get_object(stix_id)['objects'] stix_obj = list(apply_common_filters(stix_objs, query)) except HTTPError as e: if e.response.status_code == 404: # if resource not found or access is denied from TAXII server, # return None stix_obj = [] else: raise DataSourceError( "TAXII Collection resource returned error", e) if len(stix_obj): stix_obj = parse(stix_obj[0], allow_custom=self.allow_custom, version=version) if stix_obj['id'] != stix_id: # check - was added to handle erroneous TAXII servers stix_obj = None else: stix_obj = None return stix_obj
def get(self, stix_id, _composite_filters=None): """Retrieve STIX object by STIX ID Federated retrieve method, iterates through all DataSources defined in the "data_sources" parameter. Each data source has a specific API retrieve-like function and associated parameters. This function does a federated retrieval and consolidation of the data returned from all the STIX data sources. A composite data source will pass its attached filters to each configured data source, pushing filtering to them to handle. Args: stix_id (str): the id of the STIX object to retrieve. _composite_filters (FilterSet): a collection of filters passed from a CompositeDataSource (i.e. if this CompositeDataSource is attached to another parent CompositeDataSource), not user supplied. Returns: stix_obj: The STIX object to be returned. """ if not self.has_data_sources(): raise AttributeError("CompositeDataSource has no data sources") all_data = [] all_filters = FilterSet() all_filters.add(self.filters) if _composite_filters: all_filters.add(_composite_filters) # for every configured Data Source, call its retrieve handler for ds in self.data_sources: data = ds.get(stix_id=stix_id, _composite_filters=all_filters) if data: all_data.append(data) # remove duplicate versions if len(all_data) > 0: all_data = deduplicate(all_data) else: return None # reduce to most recent version stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0] return stix_obj
def get(self, stix_id, version=None, _composite_filters=None): """Retrieve STIX object from local/remote STIX Collection endpoint. Args: stix_id (str): The STIX ID of the STIX object to be retrieved. _composite_filters (FilterSet): collection of filters passed from the parent CompositeDataSource, not user supplied version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. Returns: (STIX object): STIX object that has the supplied STIX ID. The STIX object is received from TAXII has dict, parsed into a python STIX object and then returned """ # combine all query filters query = FilterSet() if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # dont extract TAXII filters from query (to send to TAXII endpoint) # as directly retrieveing a STIX object by ID try: stix_objs = self.collection.get_object(stix_id)["objects"] stix_obj = list(apply_common_filters(stix_objs, query)) except HTTPError as e: if e.response.status_code == 404: # if resource not found or access is denied from TAXII server, return None stix_obj = [] else: raise DataSourceError( "TAXII Collection resource returned error", e) if len(stix_obj): stix_obj = parse(stix_obj[0], allow_custom=self.allow_custom, version=version) if stix_obj.id != stix_id: # check - was added to handle erroneous TAXII servers stix_obj = None else: stix_obj = None return stix_obj
def get(self, stix_id, _composite_filters=None): """Retrieve STIX object by STIX ID Federated retrieve method, iterates through all DataSources defined in the "data_sources" parameter. Each data source has a specific API retrieve-like function and associated parameters. This function does a federated retrieval and consolidation of the data returned from all the STIX data sources. A composite data source will pass its attached filters to each configured data source, pushing filtering to them to handle. Args: stix_id (str): the id of the STIX object to retrieve. _composite_filters (FilterSet): a collection of filters passed from a CompositeDataSource (i.e. if this CompositeDataSource is attached to another parent CompositeDataSource), not user supplied. Returns: stix_obj: The STIX object to be returned. """ if not self.has_data_sources(): raise AttributeError("CompositeDataSource has no data sources") all_data = [] all_filters = FilterSet() all_filters.add(self.filters) if _composite_filters: all_filters.add(_composite_filters) # for every configured Data Source, call its retrieve handler for ds in self.data_sources: data = ds.get(stix_id=stix_id, _composite_filters=all_filters) if data: all_data.append(data) # Search for latest version stix_obj = latest_ver = None for obj in all_data: ver = obj.get("modified") or obj.get("created") if stix_obj is None or ver is None or ver > latest_ver: stix_obj = obj latest_ver = ver return stix_obj
def query(self, query=None, version=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this FileSystemSource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are loaded from their json files, parsed into a python STIX objects and then returned. """ all_data = [] query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) auth_types, auth_ids = _find_search_optimizations(query) type_dirs = _get_matching_dir_entries(self._stix_dir, auth_types, stat.S_ISDIR) for type_dir in type_dirs: type_path = os.path.join(self._stix_dir, type_dir) if type_dir == "marking-definition": type_results = _search_markings(query, type_path, auth_ids, self.allow_custom, version) else: type_results = _search_versioned(query, type_path, auth_ids, self.allow_custom, version) all_data.extend(type_results) return all_data
def query(self, query=None, _composite_filters=None): """Retrieve STIX objects that match a query. Federate the query to all DataSources attached to the Composite Data Source. Args: query (list): list of filters to search on. _composite_filters (FilterSet): a collection of filters passed from a CompositeDataSource (i.e. if this CompositeDataSource is attached to a parent CompositeDataSource), not user supplied. Returns: list: The STIX objects to be returned. """ if not self.has_data_sources(): raise AttributeError("CompositeDataSource has no data sources") if not query: # don't mess with the query (i.e. deduplicate, as that's done # within the specific DataSources that are called) query = [] all_data = [] all_filters = FilterSet() all_filters.add(self.filters) if _composite_filters: all_filters.add(_composite_filters) # federate query to all attached data sources, # pass composite filters to id for ds in self.data_sources: data = ds.query(query=query, _composite_filters=all_filters) all_data.extend(data) # remove exact duplicates (where duplicates are STIX 2.0 # objects with the same 'id' and 'modified' values) if len(all_data) > 0: all_data = deduplicate(all_data) return all_data
def all_versions(self, stix_id, _composite_filters=None): """Retrieve all versions of a STIX object by STIX ID. Federated all_versions retrieve method - iterates through all DataSources defined in "data_sources". A composite data source will pass its attached filters to each configured data source, pushing filtering to them to handle. Args: stix_id (str): id of the STIX objects to retrieve. _composite_filters (FilterSet): a collection of filters passed from a CompositeDataSource (i.e. if this CompositeDataSource is attached to a parent CompositeDataSource), not user supplied. Returns: list: The STIX objects that have the specified id. """ if not self.has_data_sources(): raise AttributeError("CompositeDataSource has no data sources") all_data = [] all_filters = FilterSet() all_filters.add(self.filters) if _composite_filters: all_filters.add(_composite_filters) # retrieve STIX objects from all configured data sources for ds in self.data_sources: data = ds.all_versions(stix_id=stix_id, _composite_filters=all_filters) all_data.extend(data) # remove exact duplicates (where duplicates are STIX 2.0 objects # with the same 'id' and 'modified' values) if len(all_data) > 0: all_data = deduplicate(all_data) return all_data
def query(self, query=None, version=None, _composite_filters=None): """Search and retreive STIX objects based on the complete query A "complete query" includes the filters from the query, the filters attached to MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters) Args: query (list): list of filters to search on version (str): If present, it forces the parser to use the version provided. Otherwise, the library will make the best effort based on checking the "spec_version" property. _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are received from TAXII as dicts, parsed into python STIX objects and then returned. """ query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # parse taxii query params (that can be applied remotely) taxii_filters = self._parse_taxii_filters(query) # taxii2client requires query params as keywords taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters) # query TAXII collection all_data = [] paged_request = tcv21.as_pages if isinstance( self.collection, tcv21.Collection) else tcv20.as_pages try: for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict): all_data.extend(resource.get("objects", [])) except HTTPError as e: # if resources not found or access is denied from TAXII server, return empty list if e.response.status_code == 404: raise DataSourceError( "The requested STIX objects for the TAXII Collection resource defined in" " the supplied TAXII Collection object are either not found or access is" " denied. Received error: ", e, ) # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) if # the server isn't sending Content-Range headers, so the pager just # goes until it runs out of pages. So 416 can't be treated as a # real error, just an end-of-pages condition. For other codes, # propagate the exception. elif e.response.status_code != 416: raise # deduplicate data (before filtering as reduces wasted filtering) all_data = deduplicate(all_data) # apply local (CompositeDataSource, TAXIICollectionSource and query) filters query.remove(taxii_filters) all_data = list(apply_common_filters(all_data, query)) # parse python STIX objects from the STIX object dicts stix_objs = [ parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data ] return stix_objs
def __init__(self): super(DataSource, self).__init__() self.id = make_id() self.filters = FilterSet()
def query(self, query=None, version=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this FileSystemSource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (FilterSet): collection of filters passed from the CompositeDataSource, not user supplied version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. Returns: (list): list of STIX objects that matches the supplied query. The STIX objects are loaded from their json files, parsed into a python STIX objects and then returned. """ all_data = [] query = FilterSet(query) # combine all query filters if self.filters: query.add(self.filters) if _composite_filters: query.add(_composite_filters) # extract any filters that are for "type" or "id" , as we can then do # filtering before reading in the STIX objects. A STIX 'type' filter # can reduce the query to a single sub-directory. A STIX 'id' filter # allows for the fast checking of the file names versus loading it. file_filters = self._parse_file_filters(query) # establish which subdirectories can be avoided in query # by decluding as many as possible. A filter with "type" as the property # means that certain STIX object types can be ruled out, and thus # the corresponding subdirectories as well include_paths = [] declude_paths = [] if "type" in [filter.property for filter in file_filters]: for filter in file_filters: if filter.property == "type": if filter.op == "=": include_paths.append( os.path.join(self._stix_dir, filter.value)) elif filter.op == "!=": declude_paths.append( os.path.join(self._stix_dir, filter.value)) else: # have to walk entire STIX directory include_paths.append(self._stix_dir) # if a user specifies a "type" filter like "type = <stix-object_type>", # the filter is reducing the search space to single stix object types # (and thus single directories). This makes such a filter more powerful # than "type != <stix-object_type>" bc the latter is substracting # only one type of stix object type (and thus only one directory), # As such the former type of filters are given preference over the latter; # i.e. if both exist in a query, that latter type will be ignored if not include_paths: # user has specified types that are not wanted (i.e. "!=") # so query will look in all STIX directories that are not # the specified type. Compile correct dir paths for dir in os.listdir(self._stix_dir): if os.path.abspath(os.path.join(self._stix_dir, dir)) not in declude_paths: include_paths.append( os.path.abspath(os.path.join(self._stix_dir, dir))) # grab stix object ID as well - if present in filters, as # may forgo the loading of STIX content into memory if "id" in [filter.property for filter in file_filters]: for filter in file_filters: if filter.property == "id" and filter.op == "=": id_ = filter.value break else: id_ = None else: id_ = None # now iterate through all STIX objs for path in include_paths: for root, dirs, files in os.walk(path): for file_ in files: if not file_.endswith(".json"): # skip non '.json' files as more likely to be random non-STIX files continue if not id_ or id_ == file_.split(".")[0]: # have to load into memory regardless to evaluate other filters try: stix_obj = json.load( open(os.path.join(root, file_))) if stix_obj["type"] == "bundle": stix_obj = stix_obj["objects"][0] # naive STIX type checking stix_obj["type"] stix_obj["id"] except (ValueError, KeyError): # likely not a JSON file raise TypeError( "STIX JSON object at '{0}' could either not be parsed to " "JSON or was not valid STIX JSON".format( os.path.join(root, file_))) # check against other filters, add if match all_data.extend(apply_common_filters([stix_obj], query)) all_data = deduplicate(all_data) # parse python STIX objects from the STIX object dicts stix_objs = [ parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data ] return stix_objs