Esempio n. 1
    def relationships(self, *args, **kwargs):
        """Retrieve Relationships involving the given STIX object.

        Only one of `source_only` and `target_only` may be `True`.

        Federated relationships retrieve method - iterates through all
        DataSources defined in "data_sources".

            obj (STIX object OR dict OR str): The STIX object (or its ID) whose
                relationships will be looked up.
            relationship_type (str): Only retrieve Relationships of this type.
                If None, all relationships will be returned, regardless of type.
            source_only (bool): Only retrieve Relationships for which this
                object is the source_ref. Default: False.
            target_only (bool): Only retrieve Relationships for which this
                object is the target_ref. Default: False.

            list: The Relationship objects involving the given STIX object.

        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        results = []
        for ds in self.data_sources:
            results.extend(ds.relationships(*args, **kwargs))

        # remove exact duplicates (where duplicates are STIX 2.0
        # objects with the same 'id' and 'modified' values)
        if len(results) > 0:
            results = deduplicate(results)

        return results
Esempio n. 2
    def run(self, work_id: str, state: Mapping[str, Any]) -> Mapping[str, Any]:
        """Run the importation of the article."""
        self.work_id = work_id
        created = parser.parse(self.article["createdDate"])
        # RisIQ API does not always provide the `publishedDate`.
        # If it does not exist, take the value of the `createdDate` instead.
        published = (
            if self.article["publishedDate"] is not None
            else created

        indicators = itertools.chain(
                for indicator in self.article["indicators"]

        indicators = utils.deduplicate(list(indicators))
        # Return the initial state if we don't have any indicators.
        if not indicators:
            self.helper.log_info("No indicator in article, report will not be created.")
            return state

        self.helper.log_debug(f"Number of indicators: {len(indicators)}")

        # Check if all indicators' TLP marking are `TLP_WHITE`.
        report_tlp = TLP_WHITE
        if TLP_AMBER in [i["object_marking_refs"][0] for i in indicators]:
            report_tlp = TLP_AMBER

        report = Report(
            name=self.article.get("title", "RiskIQ Threat Report"),
                    "source_name": "riskiq",
                    "url": self.article["link"],
                    "external_id": self.article["guid"],
        self.helper.log_debug(f"[RiskIQ] Report = {report}")

        bundle = Bundle(objects=indicators + [report,], allow_custom=True)
        self.helper.log_info("[RiskIQ] Sending report STIX2 bundle")

        return self._create_state(created)
Esempio n. 3
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((, f.value) for f in taxii_filters)

        # query TAXII collection
            all_data = self.collection.get_objects(**taxii_filters_dict)['objects']

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ", e,

        # parse python STIX objects from the STIX object dicts
        stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]

        return stix_objs
Esempio n. 4
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((, f.value) for f in taxii_filters)

        # query TAXII collection
            all_data = self.collection.get_objects(

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError:
            # if resources not found or access is denied from TAXII server, return empty list
            all_data = []

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
                  version=version) for stix_obj_dict in all_data

        return stix_objs
Esempio n. 5
    def get(self, stix_id, _composite_filters=None):
        """Retrieve STIX object by STIX ID

        Federated retrieve method, iterates through all DataSources
        defined in the "data_sources" parameter. Each data source has a
        specific API retrieve-like function and associated parameters. This
        function does a federated retrieval and consolidation of the data
        returned from all the STIX data sources.

        A composite data source will pass its attached filters to
        each configured data source, pushing filtering to them to handle.

            stix_id (str): the id of the STIX object to retrieve.
            _composite_filters (FilterSet): a collection of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is attached
                to another parent CompositeDataSource), not user supplied.

            stix_obj: The STIX object to be returned.

        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        all_data = []
        all_filters = FilterSet()


        if _composite_filters:

        # for every configured Data Source, call its retrieve handler
        for ds in self.data_sources:
            data = ds.get(stix_id=stix_id, _composite_filters=all_filters)
            if data:

        # remove duplicate versions
        if len(all_data) > 0:
            all_data = deduplicate(all_data)
            return None

        # reduce to most recent version
        stix_obj = sorted(all_data, key=lambda k: k['modified'],

        return stix_obj
Esempio n. 6
def test_deduplicate():
    unique = deduplicate(STIX_OBJS1)

    # Only 3 objects are unique
    # 2 id's vary
    # 2 modified times vary for a particular id

    assert len(unique) == 3

    ids = [obj['id'] for obj in unique]
    mods = [obj['modified'] for obj in unique]

    assert "indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f" in ids
    assert "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f" in ids
    assert "2017-01-27T13:49:53.935Z" in mods
    assert "2017-01-27T13:49:53.936Z" in mods
Esempio n. 7
    def query(self, query=None, _composite_filters=None):
        """Retrieve STIX objects that match a query.

        Federate the query to all DataSources attached to the
        Composite Data Source.

            query (list): list of filters to search on.
            _composite_filters (list): a list of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is
                attached to a parent CompositeDataSource), not user supplied.

            all_data (list): list of STIX objects to be returned

        if not self.has_data_sources():
            raise AttributeError('CompositeDataSource has no data sources')

        if not query:
            # don't mess with the query (i.e. convert to a set, as that's done
            # within the specific DataSources that are called)
            query = []

        all_data = []

        all_filters = set()

        if _composite_filters:

        # federate query to all attached data sources,
        # pass composite filters to id
        for ds in self.data_sources:
            data = ds.query(query=query, _composite_filters=all_filters)

        # remove exact duplicates (where duplicates are STIX 2.0
        # objects with the same 'id' and 'modified' values)
        if len(all_data) > 0:
            all_data = deduplicate(all_data)

        return all_data
Esempio n. 8
    def all_versions(self, stix_id, _composite_filters=None):
        """Retrieve all versions of a STIX object by STIX ID.

        Federated all_versions retrieve method - iterates through all
        DataSources defined in "data_sources".

        A composite data source will pass its attached filters to
        each configured data source, pushing filtering to them to handle.

            stix_id (str): id of the STIX objects to retrieve.
            _composite_filters (FilterSet): a collection of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is
                attached to a parent CompositeDataSource), not user supplied.

            list: The STIX objects that have the specified id.

        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        all_data = []
        all_filters = FilterSet()


        if _composite_filters:

        # retrieve STIX objects from all configured data sources
        for ds in self.data_sources:
            data = ds.all_versions(stix_id=stix_id,

        # remove exact duplicates (where duplicates are STIX 2.0 objects
        # with the same 'id' and 'modified' values)
        if len(all_data) > 0:
            all_data = deduplicate(all_data)

        return all_data
Esempio n. 9
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((, f.value) for f in taxii_filters)

        # query TAXII collection
        all_data = []
        paged_request = tcv21.as_pages if isinstance(
            self.collection, tcv21.Collection) else tcv20.as_pages
            for resource in paged_request(self.collection.get_objects,
                all_data.extend(resource.get("objects", []))
        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ",

            # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) if
            # the server isn't sending Content-Range headers, so the pager just
            # goes until it runs out of pages.  So 416 can't be treated as a
            # real error, just an end-of-pages condition.  For other codes,
            # propagate the exception.
            elif e.response.status_code != 416:

        # deduplicate data (before filtering as reduces wasted filtering)
        all_data = deduplicate(all_data)

        # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
        all_data = list(apply_common_filters(all_data, query))

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
                  version=version) for stix_obj_dict in all_data

        return stix_objs
Esempio n. 10
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this FileSystemSource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are loaded from their json files,
                parsed into a python STIX objects and then returned.


        all_data = []

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # extract any filters that are for "type" or "id" , as we can then do
        # filtering before reading in the STIX objects. A STIX 'type' filter
        # can reduce the query to a single sub-directory. A STIX 'id' filter
        # allows for the fast checking of the file names versus loading it.
        file_filters = self._parse_file_filters(query)

        # establish which subdirectories can be avoided in query
        # by decluding as many as possible. A filter with "type" as the property
        # means that certain STIX object types can be ruled out, and thus
        # the corresponding subdirectories as well
        include_paths = []
        declude_paths = []
        if "type" in [ for filter in file_filters]:
            for filter in file_filters:
                if == "type":
                    if filter.op == "=":
                            os.path.join(self._stix_dir, filter.value))
                    elif filter.op == "!=":
                            os.path.join(self._stix_dir, filter.value))
            # have to walk entire STIX directory

        # if a user specifies a "type" filter like "type = <stix-object_type>",
        # the filter is reducing the search space to single stix object types
        # (and thus single directories). This makes such a filter more powerful
        # than "type != <stix-object_type>" bc the latter is substracting
        # only one type of stix object type (and thus only one directory),
        # As such the former type of filters are given preference over the latter;
        # i.e. if both exist in a query, that latter type will be ignored

        if not include_paths:
            # user has specified types that are not wanted (i.e. "!=")
            # so query will look in all STIX directories that are not
            # the specified type. Compile correct dir paths
            for dir in os.listdir(self._stix_dir):
                if os.path.abspath(os.path.join(self._stix_dir,
                                                dir)) not in declude_paths:
                        os.path.abspath(os.path.join(self._stix_dir, dir)))

        # grab stix object ID as well - if present in filters, as
        # may forgo the loading of STIX content into memory
        if "id" in [ for filter in file_filters]:
            for filter in file_filters:
                if == "id" and filter.op == "=":
                    id_ = filter.value
                id_ = None
            id_ = None

        # now iterate through all STIX objs
        for path in include_paths:
            for root, dirs, files in os.walk(path):
                for file_ in files:
                    if not file_.endswith(".json"):
                        # skip non '.json' files as more likely to be random non-STIX files

                    if not id_ or id_ == file_.split(".")[0]:
                        # have to load into memory regardless to evaluate other filters
                            stix_obj = json.load(
                                open(os.path.join(root, file_)))

                            if stix_obj["type"] == "bundle":
                                stix_obj = stix_obj["objects"][0]

                            # naive STIX type checking

                        except (ValueError,
                                KeyError):  # likely not a JSON file
                            raise TypeError(
                                "STIX JSON object at '{0}' could either not be parsed to "
                                "JSON or was not valid STIX JSON".format(
                                    os.path.join(root, file_)))

                        # check against other filters, add if match

        all_data = deduplicate(all_data)

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
                  version=version) for stix_obj_dict in all_data

        return stix_objs
Esempio n. 11
    def query(self,
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this FileSystemSource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

            query (list): list of filters to search on
            _composite_filters (set): set of filters passed from the
                CompositeDataSource, not user supplied
            allow_custom (bool): whether to retrieve custom objects/properties
                or not. Default: False.
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are loaded from their json files,
                parsed into a python STIX objects and then returned.

        all_data = []

        if query is None:
            query = set()
            if not isinstance(query, list):
                # make sure dont make set from a Filter object,
                # need to make a set from a list of Filter objects (even if just one Filter)
                query = [query]
            query = set(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # extract any filters that are for "type" or "id" , as we can then do
        # filtering before reading in the STIX objects. A STIX 'type' filter
        # can reduce the query to a single sub-directory. A STIX 'id' filter
        # allows for the fast checking of the file names versus loading it.
        file_filters = self._parse_file_filters(query)

        # establish which subdirectories can be avoided in query
        # by decluding as many as possible. A filter with "type" as the property
        # means that certain STIX object types can be ruled out, and thus
        # the corresponding subdirectories as well
        include_paths = []
        declude_paths = []
        if "type" in [ for filter in file_filters]:
            for filter in file_filters:
                if == "type":
                    if filter.op == "=":
                            os.path.join(self._stix_dir, filter.value))
                    elif filter.op == "!=":
                            os.path.join(self._stix_dir, filter.value))
            # have to walk entire STIX directory

        # if a user specifies a "type" filter like "type = <stix-object_type>",
        # the filter is reducing the search space to single stix object types
        # (and thus single directories). This makes such a filter more powerful
        # than "type != <stix-object_type>" bc the latter is substracting
        # only one type of stix object type (and thus only one directory),
        # As such the former type of filters are given preference over the latter;
        # i.e. if both exist in a query, that latter type will be ignored

        if not include_paths:
            # user has specified types that are not wanted (i.e. "!=")
            # so query will look in all STIX directories that are not
            # the specified type. Compile correct dir paths
            for dir in os.listdir(self._stix_dir):
                if os.path.abspath(os.path.join(self._stix_dir,
                                                dir)) not in declude_paths:
                        os.path.abspath(os.path.join(self._stix_dir, dir)))

        # grab stix object ID as well - if present in filters, as
        # may forgo the loading of STIX content into memory
        if "id" in [ for filter in file_filters]:
            for filter in file_filters:
                if == "id" and filter.op == "=":
                    id_ = filter.value
                id_ = None
            id_ = None

        # now iterate through all STIX objs
        for path in include_paths:
            for root, dirs, files in os.walk(path):
                for file_ in files:
                    if not id_ or id_ == file_.split(".")[0]:
                        # have to load into memory regardless to evaluate other filters
                        stix_obj = json.load(open(os.path.join(root, file_)))
                        if stix_obj.get('type', '') == 'bundle':
                            stix_obj = stix_obj['objects'][0]
                        # check against other filters, add if match

        all_data = deduplicate(all_data)

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict, allow_custom=allow_custom, version=version)
            for stix_obj_dict in all_data

        return stix_objs
Esempio n. 12
    def query(self,
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

            query (list): list of filters to search on
            _composite_filters (set): set of filters passed from the
                CompositeDataSource, not user supplied
            allow_custom (bool): whether to retrieve custom objects/properties
                or not. Default: False.
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        if query is None:
            query = set()
            if not isinstance(query, list):
                # make sure dont make set from a Filter object,
                # need to make a set from a list of Filter objects (even if just one Filter)
                query = [query]
            query = set(query)

        # combine all query filters
        if self.filters:
        if _composite_filters:

        # separate taxii query terms (can be done remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # query TAXII collection
        all_data = self.collection.get_objects(

        # deduplicate data (before filtering as reduces wasted filtering)
        all_data = deduplicate(all_data)

        # apply local (CompositeDataSource, TAXIICollectionSource and query filters)
        all_data = list(apply_common_filters(all_data, query))

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict, allow_custom=allow_custom, version=version)
            for stix_obj_dict in all_data

        return stix_objs