예제 #1
0
    def relationships(self, *args, **kwargs):
        """Retrieve Relationships involving the given STIX object.

        Only one of `source_only` and `target_only` may be `True`.

        Federated relationships retrieve method - iterates through all
        DataSources defined in "data_sources".

        Args:
            obj (STIX object OR dict OR str): The STIX object (or its ID) whose
                relationships will be looked up.
            relationship_type (str): Only retrieve Relationships of this type.
                If None, all relationships will be returned, regardless of type.
            source_only (bool): Only retrieve Relationships for which this
                object is the source_ref. Default: False.
            target_only (bool): Only retrieve Relationships for which this
                object is the target_ref. Default: False.

        Returns:
            list: The Relationship objects involving the given STIX object.

        """
        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        results = []
        for ds in self.data_sources:
            results.extend(ds.relationships(*args, **kwargs))

        # remove exact duplicates (where duplicates are STIX 2.0
        # objects with the same 'id' and 'modified' values)
        if len(results) > 0:
            results = deduplicate(results)

        return results
예제 #2
0
    def run(self, work_id: str, state: Mapping[str, Any]) -> Mapping[str, Any]:
        """Run the importation of the article."""
        self.work_id = work_id
        created = parser.parse(self.article["createdDate"])
        # RisIQ API does not always provide the `publishedDate`.
        # If it does not exist, take the value of the `createdDate` instead.
        published = (
            parser.parse(self.article["publishedDate"])
            if self.article["publishedDate"] is not None
            else created
        )

        indicators = itertools.chain(
            *[
                self._process_indicator(indicator)
                for indicator in self.article["indicators"]
            ]
        )

        indicators = utils.deduplicate(list(indicators))
        # Return the initial state if we don't have any indicators.
        if not indicators:
            self.helper.log_info("No indicator in article, report will not be created.")
            return state

        self.helper.log_debug(f"Number of indicators: {len(indicators)}")

        # Check if all indicators' TLP marking are `TLP_WHITE`.
        report_tlp = TLP_WHITE
        if TLP_AMBER in [i["object_marking_refs"][0] for i in indicators]:
            report_tlp = TLP_AMBER

        report = Report(
            type="report",
            name=self.article.get("title", "RiskIQ Threat Report"),
            description=self.article["summary"],
            report_types=["threat-report"],
            created_by_ref=self.author,
            created=created,
            published=published,
            lang="en",
            labels=self.article["tags"],
            object_refs=indicators,
            object_marking_refs=report_tlp,
            external_references=[
                {
                    "source_name": "riskiq",
                    "url": self.article["link"],
                    "external_id": self.article["guid"],
                }
            ],
            allow_custom=True,
        )
        self.helper.log_debug(f"[RiskIQ] Report = {report}")

        bundle = Bundle(objects=indicators + [report, self.author], allow_custom=True)
        self.helper.log_info("[RiskIQ] Sending report STIX2 bundle")
        self._send_bundle(bundle)

        return self._create_state(created)
예제 #3
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        try:
            all_data = self.collection.get_objects(**taxii_filters_dict)['objects']

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            query.remove(taxii_filters)
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ", e,
                )

        # parse python STIX objects from the STIX object dicts
        stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]

        return stix_objs
예제 #4
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        try:
            all_data = self.collection.get_objects(
                **taxii_filters_dict)["objects"]

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            query.remove(taxii_filters)
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError:
            # if resources not found or access is denied from TAXII server, return empty list
            all_data = []

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs
예제 #5
0
    def get(self, stix_id, _composite_filters=None):
        """Retrieve STIX object by STIX ID

        Federated retrieve method, iterates through all DataSources
        defined in the "data_sources" parameter. Each data source has a
        specific API retrieve-like function and associated parameters. This
        function does a federated retrieval and consolidation of the data
        returned from all the STIX data sources.

        A composite data source will pass its attached filters to
        each configured data source, pushing filtering to them to handle.

        Args:
            stix_id (str): the id of the STIX object to retrieve.
            _composite_filters (FilterSet): a collection of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is attached
                to another parent CompositeDataSource), not user supplied.

        Returns:
            stix_obj: The STIX object to be returned.

        """
        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        all_data = []
        all_filters = FilterSet()

        all_filters.add(self.filters)

        if _composite_filters:
            all_filters.add(_composite_filters)

        # for every configured Data Source, call its retrieve handler
        for ds in self.data_sources:
            data = ds.get(stix_id=stix_id, _composite_filters=all_filters)
            if data:
                all_data.append(data)

        # remove duplicate versions
        if len(all_data) > 0:
            all_data = deduplicate(all_data)
        else:
            return None

        # reduce to most recent version
        stix_obj = sorted(all_data, key=lambda k: k['modified'],
                          reverse=True)[0]

        return stix_obj
예제 #6
0
def test_deduplicate():
    unique = deduplicate(STIX_OBJS1)

    # Only 3 objects are unique
    # 2 id's vary
    # 2 modified times vary for a particular id

    assert len(unique) == 3

    ids = [obj['id'] for obj in unique]
    mods = [obj['modified'] for obj in unique]

    assert "indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f" in ids
    assert "indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f" in ids
    assert "2017-01-27T13:49:53.935Z" in mods
    assert "2017-01-27T13:49:53.936Z" in mods
예제 #7
0
    def query(self, query=None, _composite_filters=None):
        """Retrieve STIX objects that match a query.

        Federate the query to all DataSources attached to the
        Composite Data Source.

        Args:
            query (list): list of filters to search on.
            _composite_filters (list): a list of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is
                attached to a parent CompositeDataSource), not user supplied.

        Returns:
            all_data (list): list of STIX objects to be returned

        """
        if not self.has_data_sources():
            raise AttributeError('CompositeDataSource has no data sources')

        if not query:
            # don't mess with the query (i.e. convert to a set, as that's done
            # within the specific DataSources that are called)
            query = []

        all_data = []

        all_filters = set()
        all_filters.update(self.filters)

        if _composite_filters:
            all_filters.update(_composite_filters)

        # federate query to all attached data sources,
        # pass composite filters to id
        for ds in self.data_sources:
            data = ds.query(query=query, _composite_filters=all_filters)
            all_data.extend(data)

        # remove exact duplicates (where duplicates are STIX 2.0
        # objects with the same 'id' and 'modified' values)
        if len(all_data) > 0:
            all_data = deduplicate(all_data)

        return all_data
예제 #8
0
    def all_versions(self, stix_id, _composite_filters=None):
        """Retrieve all versions of a STIX object by STIX ID.

        Federated all_versions retrieve method - iterates through all
        DataSources defined in "data_sources".

        A composite data source will pass its attached filters to
        each configured data source, pushing filtering to them to handle.

        Args:
            stix_id (str): id of the STIX objects to retrieve.
            _composite_filters (FilterSet): a collection of filters passed from a
                CompositeDataSource (i.e. if this CompositeDataSource is
                attached to a parent CompositeDataSource), not user supplied.

        Returns:
            list: The STIX objects that have the specified id.

        """
        if not self.has_data_sources():
            raise AttributeError("CompositeDataSource has no data sources")

        all_data = []
        all_filters = FilterSet()

        all_filters.add(self.filters)

        if _composite_filters:
            all_filters.add(_composite_filters)

        # retrieve STIX objects from all configured data sources
        for ds in self.data_sources:
            data = ds.all_versions(stix_id=stix_id,
                                   _composite_filters=all_filters)
            all_data.extend(data)

        # remove exact duplicates (where duplicates are STIX 2.0 objects
        # with the same 'id' and 'modified' values)
        if len(all_data) > 0:
            all_data = deduplicate(all_data)

        return all_data
예제 #9
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        all_data = []
        paged_request = tcv21.as_pages if isinstance(
            self.collection, tcv21.Collection) else tcv20.as_pages
        try:
            for resource in paged_request(self.collection.get_objects,
                                          per_request=self.items_per_page,
                                          **taxii_filters_dict):
                all_data.extend(resource.get("objects", []))
        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ",
                    e,
                )

            # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) if
            # the server isn't sending Content-Range headers, so the pager just
            # goes until it runs out of pages.  So 416 can't be treated as a
            # real error, just an end-of-pages condition.  For other codes,
            # propagate the exception.
            elif e.response.status_code != 416:
                raise

        # deduplicate data (before filtering as reduces wasted filtering)
        all_data = deduplicate(all_data)

        # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
        query.remove(taxii_filters)
        all_data = list(apply_common_filters(all_data, query))

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs
예제 #10
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this FileSystemSource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are loaded from their json files,
                parsed into a python STIX objects and then returned.

        """

        all_data = []

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # extract any filters that are for "type" or "id" , as we can then do
        # filtering before reading in the STIX objects. A STIX 'type' filter
        # can reduce the query to a single sub-directory. A STIX 'id' filter
        # allows for the fast checking of the file names versus loading it.
        file_filters = self._parse_file_filters(query)

        # establish which subdirectories can be avoided in query
        # by decluding as many as possible. A filter with "type" as the property
        # means that certain STIX object types can be ruled out, and thus
        # the corresponding subdirectories as well
        include_paths = []
        declude_paths = []
        if "type" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "type":
                    if filter.op == "=":
                        include_paths.append(
                            os.path.join(self._stix_dir, filter.value))
                    elif filter.op == "!=":
                        declude_paths.append(
                            os.path.join(self._stix_dir, filter.value))
        else:
            # have to walk entire STIX directory
            include_paths.append(self._stix_dir)

        # if a user specifies a "type" filter like "type = <stix-object_type>",
        # the filter is reducing the search space to single stix object types
        # (and thus single directories). This makes such a filter more powerful
        # than "type != <stix-object_type>" bc the latter is substracting
        # only one type of stix object type (and thus only one directory),
        # As such the former type of filters are given preference over the latter;
        # i.e. if both exist in a query, that latter type will be ignored

        if not include_paths:
            # user has specified types that are not wanted (i.e. "!=")
            # so query will look in all STIX directories that are not
            # the specified type. Compile correct dir paths
            for dir in os.listdir(self._stix_dir):
                if os.path.abspath(os.path.join(self._stix_dir,
                                                dir)) not in declude_paths:
                    include_paths.append(
                        os.path.abspath(os.path.join(self._stix_dir, dir)))

        # grab stix object ID as well - if present in filters, as
        # may forgo the loading of STIX content into memory
        if "id" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "id" and filter.op == "=":
                    id_ = filter.value
                    break
            else:
                id_ = None
        else:
            id_ = None

        # now iterate through all STIX objs
        for path in include_paths:
            for root, dirs, files in os.walk(path):
                for file_ in files:
                    if not file_.endswith(".json"):
                        # skip non '.json' files as more likely to be random non-STIX files
                        continue

                    if not id_ or id_ == file_.split(".")[0]:
                        # have to load into memory regardless to evaluate other filters
                        try:
                            stix_obj = json.load(
                                open(os.path.join(root, file_)))

                            if stix_obj["type"] == "bundle":
                                stix_obj = stix_obj["objects"][0]

                            # naive STIX type checking
                            stix_obj["type"]
                            stix_obj["id"]

                        except (ValueError,
                                KeyError):  # likely not a JSON file
                            raise TypeError(
                                "STIX JSON object at '{0}' could either not be parsed to "
                                "JSON or was not valid STIX JSON".format(
                                    os.path.join(root, file_)))

                        # check against other filters, add if match
                        all_data.extend(apply_common_filters([stix_obj],
                                                             query))

        all_data = deduplicate(all_data)

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs
예제 #11
0
파일: filesystem.py 프로젝트: iskitsas/MISP
    def query(self,
              query=None,
              allow_custom=False,
              version=None,
              _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this FileSystemSource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

        Args:
            query (list): list of filters to search on
            _composite_filters (set): set of filters passed from the
                CompositeDataSource, not user supplied
            allow_custom (bool): whether to retrieve custom objects/properties
                or not. Default: False.
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are loaded from their json files,
                parsed into a python STIX objects and then returned.

        """
        all_data = []

        if query is None:
            query = set()
        else:
            if not isinstance(query, list):
                # make sure dont make set from a Filter object,
                # need to make a set from a list of Filter objects (even if just one Filter)
                query = [query]
            query = set(query)

        # combine all query filters
        if self.filters:
            query.update(self.filters)
        if _composite_filters:
            query.update(_composite_filters)

        # extract any filters that are for "type" or "id" , as we can then do
        # filtering before reading in the STIX objects. A STIX 'type' filter
        # can reduce the query to a single sub-directory. A STIX 'id' filter
        # allows for the fast checking of the file names versus loading it.
        file_filters = self._parse_file_filters(query)

        # establish which subdirectories can be avoided in query
        # by decluding as many as possible. A filter with "type" as the property
        # means that certain STIX object types can be ruled out, and thus
        # the corresponding subdirectories as well
        include_paths = []
        declude_paths = []
        if "type" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "type":
                    if filter.op == "=":
                        include_paths.append(
                            os.path.join(self._stix_dir, filter.value))
                    elif filter.op == "!=":
                        declude_paths.append(
                            os.path.join(self._stix_dir, filter.value))
        else:
            # have to walk entire STIX directory
            include_paths.append(self._stix_dir)

        # if a user specifies a "type" filter like "type = <stix-object_type>",
        # the filter is reducing the search space to single stix object types
        # (and thus single directories). This makes such a filter more powerful
        # than "type != <stix-object_type>" bc the latter is substracting
        # only one type of stix object type (and thus only one directory),
        # As such the former type of filters are given preference over the latter;
        # i.e. if both exist in a query, that latter type will be ignored

        if not include_paths:
            # user has specified types that are not wanted (i.e. "!=")
            # so query will look in all STIX directories that are not
            # the specified type. Compile correct dir paths
            for dir in os.listdir(self._stix_dir):
                if os.path.abspath(os.path.join(self._stix_dir,
                                                dir)) not in declude_paths:
                    include_paths.append(
                        os.path.abspath(os.path.join(self._stix_dir, dir)))

        # grab stix object ID as well - if present in filters, as
        # may forgo the loading of STIX content into memory
        if "id" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "id" and filter.op == "=":
                    id_ = filter.value
                    break
            else:
                id_ = None
        else:
            id_ = None

        # now iterate through all STIX objs
        for path in include_paths:
            for root, dirs, files in os.walk(path):
                for file_ in files:
                    if not id_ or id_ == file_.split(".")[0]:
                        # have to load into memory regardless to evaluate other filters
                        stix_obj = json.load(open(os.path.join(root, file_)))
                        if stix_obj.get('type', '') == 'bundle':
                            stix_obj = stix_obj['objects'][0]
                        # check against other filters, add if match
                        all_data.extend(apply_common_filters([stix_obj],
                                                             query))

        all_data = deduplicate(all_data)

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict, allow_custom=allow_custom, version=version)
            for stix_obj_dict in all_data
        ]

        return stix_objs
예제 #12
0
    def query(self,
              query=None,
              allow_custom=False,
              version=None,
              _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            _composite_filters (set): set of filters passed from the
                CompositeDataSource, not user supplied
            allow_custom (bool): whether to retrieve custom objects/properties
                or not. Default: False.
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        if query is None:
            query = set()
        else:
            if not isinstance(query, list):
                # make sure dont make set from a Filter object,
                # need to make a set from a list of Filter objects (even if just one Filter)
                query = [query]
            query = set(query)

        # combine all query filters
        if self.filters:
            query.update(self.filters)
        if _composite_filters:
            query.update(_composite_filters)

        # separate taxii query terms (can be done remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # query TAXII collection
        all_data = self.collection.get_objects(
            filters=taxii_filters)["objects"]

        # deduplicate data (before filtering as reduces wasted filtering)
        all_data = deduplicate(all_data)

        # apply local (CompositeDataSource, TAXIICollectionSource and query filters)
        all_data = list(apply_common_filters(all_data, query))

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict, allow_custom=allow_custom, version=version)
            for stix_obj_dict in all_data
        ]

        return stix_objs