Beispiel #1
0
def test_filters6(stix_objs2, real_stix_objs2):
    # Test filtering on non-common property
    resp = list(
        apply_common_filters(
            stix_objs2,
            [Filter("name", "=", "Malicious site hosting downloader")]))
    assert resp[0]['id'] == stix_objs2[0]['id']
    assert len(resp) == 3

    resp = list(
        apply_common_filters(
            real_stix_objs2,
            [Filter("name", "=", "Malicious site hosting downloader")]))
    assert resp[0].id == real_stix_objs2[0].id
    assert len(resp) == 3
def test_filters6():
    # Test filtering on non-common property
    resp = list(
        apply_common_filters(
            STIX_OBJS2,
            [Filter("name", "=", "Malicious site hosting downloader")]))
    assert resp[0]['id'] == STIX_OBJS2[0]['id']
    assert len(resp) == 3

    resp = list(
        apply_common_filters(
            REAL_STIX_OBJS2,
            [Filter("name", "=", "Malicious site hosting downloader")]))
    assert resp[0].id == REAL_STIX_OBJS2[0].id
    assert len(resp) == 3
Beispiel #3
0
def test_apply_common_filters0():
    # "Return any object whose type is not relationship"
    resp = list(apply_common_filters(stix_objs, [filters[0]]))
    ids = [r['id'] for r in resp]
    assert stix_objs[0]['id'] in ids
    assert stix_objs[1]['id'] in ids
    assert stix_objs[3]['id'] in ids
    assert len(ids) == 4

    resp = list(apply_common_filters(real_stix_objs, [filters[0]]))
    ids = [r.id for r in resp]
    assert real_stix_objs[0].id in ids
    assert real_stix_objs[1].id in ids
    assert real_stix_objs[3].id in ids
    assert len(ids) == 4
def test_filters3():
    # "Return any object modified before or on 2017-01-28T13:49:53.935Z"
    resp = list(
        apply_common_filters(
            STIX_OBJS2,
            [Filter("modified", "<=", "2017-01-27T13:49:53.935Z")]))
    assert resp[0]['id'] == STIX_OBJS2[1]['id']
    assert len(resp) == 2

    # "Return any object modified before or on 2017-01-28T13:49:53.935Z"
    fv = Filter("modified", "<=",
                parse_into_datetime("2017-01-27T13:49:53.935Z"))
    resp = list(apply_common_filters(REAL_STIX_OBJS2, [fv]))
    assert resp[0].id == REAL_STIX_OBJS2[1].id
    assert len(resp) == 2
Beispiel #5
0
def test_filters1(stix_objs2, real_stix_objs2):
    # "Return any object modified after 2017-01-28T13:49:53.935Z"
    resp = list(
        apply_common_filters(
            stix_objs2, [Filter("modified", ">", "2017-01-28T13:49:53.935Z")]))
    assert resp[0]['id'] == stix_objs2[0]['id']
    assert len(resp) == 1

    resp = list(
        apply_common_filters(real_stix_objs2, [
            Filter("modified", ">",
                   parse_into_datetime("2017-01-28T13:49:53.935Z"))
        ]))
    assert resp[0].id == real_stix_objs2[0].id
    assert len(resp) == 1
def test_filters1():
    # "Return any object modified after 2017-01-28T13:49:53.935Z"
    resp = list(
        apply_common_filters(
            STIX_OBJS2, [Filter("modified", ">", "2017-01-28T13:49:53.935Z")]))
    assert resp[0]['id'] == STIX_OBJS2[0]['id']
    assert len(resp) == 1

    resp = list(
        apply_common_filters(REAL_STIX_OBJS2, [
            Filter("modified", ">",
                   parse_into_datetime("2017-01-28T13:49:53.935Z"))
        ]))
    assert resp[0].id == REAL_STIX_OBJS2[0].id
    assert len(resp) == 1
Beispiel #7
0
def test_filters3(stix_objs2, real_stix_objs2):
    # "Return any object modified before or on 2017-01-28T13:49:53.935Z"
    resp = list(
        apply_common_filters(
            stix_objs2,
            [Filter("modified", "<=", "2017-01-27T13:49:53.935Z")]))
    assert resp[0]['id'] == stix_objs2[1]['id']
    assert len(resp) == 2

    # "Return any object modified before or on 2017-01-28T13:49:53.935Z"
    fv = Filter("modified", "<=",
                parse_into_datetime("2017-01-27T13:49:53.935Z"))
    resp = list(apply_common_filters(real_stix_objs2, [fv]))
    assert resp[0].id == real_stix_objs2[1].id
    assert len(resp) == 2
Beispiel #8
0
    def all_versions(self, stix_id, _composite_filters=None):
        """Retrieve STIX objects from in-memory dict via STIX ID, all versions
        of it.

        Args:
            stix_id (str): The STIX ID of the STIX 2 object to retrieve.
            _composite_filters (FilterSet): collection of filters passed from
                the parent CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that have the supplied ID.

        """
        results = []
        stix_objs_to_filter = None
        if is_marking(stix_id):
            stix_obj = self._data.get(stix_id)
            if stix_obj:
                stix_objs_to_filter = [stix_obj]
        else:
            object_family = self._data.get(stix_id)
            if object_family:
                stix_objs_to_filter = object_family.all_versions.values()

        if stix_objs_to_filter:
            all_filters = list(
                itertools.chain(
                    _composite_filters or [],
                    self.filters,
                ), )

            results.extend(
                apply_common_filters(stix_objs_to_filter, all_filters), )

        return results
Beispiel #9
0
    def query(self, query=None, _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that matches the supplied
                query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
                as they are supplied (either as python dictionary or STIX object), it
                is returned in the same form as it as added.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # Apply STIX common property filters.
        all_data = list(apply_common_filters(self._data.values(), query))

        return all_data
Beispiel #10
0
    def query(self, query=None, _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that match the supplied query.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        all_objs = itertools.chain.from_iterable(
            value.all_versions.values() if isinstance(value, _ObjectFamily
                                                      ) else [value]
            for value in self._data.values())

        # Apply STIX common property filters.
        all_data = list(apply_common_filters(all_objs, query))

        return all_data
Beispiel #11
0
def _check_object_from_file(query, filepath, allow_custom, version):
    """
    Read a STIX object from the given file, and check it against the given
    filters.

    :param query: Iterable of filters
    :param filepath: Path to file to read
    :param allow_custom: Whether to allow custom properties as well unknown
        custom objects.
    :param version: Which STIX2 version to use. (e.g. "2.0", "2.1"). If None,
        use latest version.
    :return: The (parsed) STIX object, if the object passes the filters.  If
        not, None is returned.
    :raises TypeError: If the file had invalid JSON
    :raises IOError: If there are problems opening/reading the file
    :raises stix2.exceptions.STIXError: If there were problems creating a STIX
        object from the JSON
    """
    try:
        with open(filepath, "r") as f:
            stix_json = json.load(f)

    except ValueError:  # not a JSON file
        raise TypeError("STIX JSON object at '{0}' could either not be parsed "
                        "to JSON or was not valid STIX JSON".format(filepath))

    stix_obj = parse(stix_json, allow_custom, version)

    if stix_obj["type"] == "bundle":
        stix_obj = stix_obj["objects"][0]

    # check against other filters, add if match
    result = next(apply_common_filters([stix_obj], query), None)

    return result
Beispiel #12
0
    def get(self, stix_id, _composite_filters=None):
        """Retrieve STIX object from in-memory dict via STIX ID.

        Args:
            stix_id (str): The STIX ID of the STIX object to be retrieved.
            _composite_filters (FilterSet): collection of filters passed from the parent
                CompositeDataSource, not user supplied

        Returns:
            (STIX object): STIX object that has the supplied ID.

        """
        stix_obj = None

        if is_marking(stix_id):
            stix_obj = self._data.get(stix_id)
        else:
            object_family = self._data.get(stix_id)
            if object_family:
                stix_obj = object_family.latest_version

        if stix_obj:
            all_filters = list(
                itertools.chain(
                    _composite_filters or [],
                    self.filters,
                ), )

            stix_obj = next(apply_common_filters([stix_obj], all_filters),
                            None)

        return stix_obj
Beispiel #13
0
    def all_versions(self, stix_id, _composite_filters=None):
        """Retrieve STIX objects from in-memory dict via STIX ID, all versions
        of it.

        Args:
            stix_id (str): The STIX ID of the STIX 2 object to retrieve.
            _composite_filters (FilterSet): collection of filters passed from
                the parent CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that have the supplied ID.

        """
        results = []
        mapped_value = self._data.get(stix_id)
        if mapped_value:
            if isinstance(mapped_value, _ObjectFamily):
                stix_objs_to_filter = mapped_value.all_versions.values()
            else:
                stix_objs_to_filter = [mapped_value]

            all_filters = list(
                itertools.chain(
                    _composite_filters or [],
                    self.filters,
                ), )

            results.extend(
                apply_common_filters(stix_objs_to_filter, all_filters), )

        return results
Beispiel #14
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        try:
            all_data = self.collection.get_objects(**taxii_filters_dict)['objects']

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            query.remove(taxii_filters)
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ", e,
                )

        # parse python STIX objects from the STIX object dicts
        stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]

        return stix_objs
def test_filters5():
    # "Return any object whose id is not indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f"
    resp = list(
        apply_common_filters(STIX_OBJS2, [
            Filter("id", "!=",
                   "indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f")
        ]))
    assert resp[0]['id'] == STIX_OBJS2[0]['id']
    assert len(resp) == 1

    resp = list(
        apply_common_filters(REAL_STIX_OBJS2, [
            Filter("id", "!=",
                   "indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f")
        ]))
    assert resp[0].id == REAL_STIX_OBJS2[0].id
    assert len(resp) == 1
def test_filters5(stix_objs2, real_stix_objs2):
    # "Return any object whose id is not indicator--00000000-0000-4000-8000-000000000002"
    resp = list(
        apply_common_filters(stix_objs2, [
            Filter("id", "!=",
                   "indicator--00000000-0000-4000-8000-000000000002")
        ]))
    assert resp[0]['id'] == stix_objs2[0]['id']
    assert len(resp) == 1

    resp = list(
        apply_common_filters(real_stix_objs2, [
            Filter("id", "!=",
                   "indicator--00000000-0000-4000-8000-000000000002")
        ]))
    assert resp[0].id == real_stix_objs2[0].id
    assert len(resp) == 1
Beispiel #17
0
def test_filters7(stix_objs2, real_stix_objs2):
    # Test filtering on embedded property
    obsvd_data_obj = {
        "type": "observed-data",
        "spec_version": "2.1",
        "id": OBSERVED_DATA_ID,
        "created_by_ref": "identity--f431f809-377b-45e0-aa1c-6a4751cae5ff",
        "created": "2016-04-06T19:58:16.000Z",
        "modified": "2016-04-06T19:58:16.000Z",
        "first_observed": "2015-12-21T19:00:00Z",
        "last_observed": "2015-12-21T19:00:00Z",
        "number_observed": 50,
        "objects": {
            "0": {
                "type": "file",
                "hashes": {
                    "SHA-256": "35a01331e9ad96f751278b891b6ea09699806faedfa237d40513d92ad1b7100f",
                },
                "extensions": {
                    "pdf-ext": {
                        "version": "1.7",
                        "document_info_dict": {
                            "Title": "Sample document",
                            "Author": "Adobe Systems Incorporated",
                            "Creator": "Adobe FrameMaker 5.5.3 for Power Macintosh",
                            "Producer": "Acrobat Distiller 3.01 for Power Macintosh",
                            "CreationDate": "20070412090123-02",
                        },
                        "pdfid0": "DFCE52BD827ECF765649852119D",
                        "pdfid1": "57A1E0F9ED2AE523E313C",
                    },
                },
            },
        },
    }

    stix_objects = list(stix_objs2) + [obsvd_data_obj]
    real_stix_objects = list(real_stix_objs2) + [parse(obsvd_data_obj)]

    resp = list(apply_common_filters(stix_objects, [Filter("objects.0.extensions.pdf-ext.version", ">", "1.2")]))
    assert resp[0]['id'] == stix_objects[3]['id']
    assert len(resp) == 1

    resp = list(apply_common_filters(real_stix_objects, [Filter("objects.0.extensions.pdf-ext.version", ">", "1.2")]))
    assert resp[0].id == real_stix_objects[3].id
    assert len(resp) == 1
Beispiel #18
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        try:
            all_data = self.collection.get_objects(
                **taxii_filters_dict)["objects"]

            # deduplicate data (before filtering as reduces wasted filtering)
            all_data = deduplicate(all_data)

            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
            query.remove(taxii_filters)
            all_data = list(apply_common_filters(all_data, query))

        except HTTPError:
            # if resources not found or access is denied from TAXII server, return empty list
            all_data = []

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs
def test_datetime_filter_behavior():
    """if a filter is initialized with its value being a datetime object
    OR the STIX object property being filtered on is a datetime object, all
    resulting comparisons executed are done on the string representations
    of the datetime objects, as the Filter functionality will convert
    all datetime objects to there string forms using format_datetim()

    This test makes sure all datetime comparisons are carried out correctly
    """
    filter_with_dt_obj = Filter(
        "created", "=",
        parse_into_datetime("2016-02-14T00:00:00.000Z", "millisecond"))
    filter_with_str = Filter("created", "=", "2016-02-14T00:00:00.000Z")

    # check that filter value is converted from datetime to str
    assert isinstance(filter_with_dt_obj.value, str)

    # compare datetime string to filter w/ datetime obj
    resp = list(apply_common_filters(stix_objs, [filter_with_dt_obj]))
    assert len(resp) == 1
    assert resp[0][
        "id"] == "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef"

    # compare datetime obj to filter w/ datetime obj
    resp = list(apply_common_filters(real_stix_objs, [filter_with_dt_obj]))
    assert len(resp) == 1
    assert resp[0][
        "id"] == "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef"
    assert isinstance(resp[0].created,
                      STIXdatetime)  # make sure original object not altered

    # compare datetime string to filter w/ str
    resp = list(apply_common_filters(stix_objs, [filter_with_str]))
    assert len(resp) == 1
    assert resp[0][
        "id"] == "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef"

    # compare datetime obj to filter w/ str
    resp = list(apply_common_filters(real_stix_objs, [filter_with_str]))
    assert len(resp) == 1
    assert resp[0][
        "id"] == "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef"
    assert isinstance(resp[0].created,
                      STIXdatetime)  # make sure original object not altered
Beispiel #20
0
    def get(self, stix_id, version=None, _composite_filters=None):
        """Retrieve STIX object from local/remote STIX Collection
        endpoint.

        Args:
            stix_id (str): The STIX ID of the STIX object to be retrieved.
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the parent CompositeDataSource, not user supplied

        Returns:
            (STIX object): STIX object that has the supplied STIX ID.
                The STIX object is received from TAXII has dict, parsed into
                a python STIX object and then returned

        """
        # combine all query filters
        query = FilterSet()

        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # don't extract TAXII filters from query (to send to TAXII endpoint)
        # as directly retrieving a STIX object by ID
        try:
            stix_objs = self.collection.get_object(stix_id)['objects']
            stix_obj = list(apply_common_filters(stix_objs, query))

        except HTTPError as e:
            if e.response.status_code == 404:
                # if resource not found or access is denied from TAXII server,
                # return None
                stix_obj = []
            else:
                raise DataSourceError(
                    "TAXII Collection resource returned error", e)

        if len(stix_obj):
            stix_obj = parse(stix_obj[0],
                             allow_custom=self.allow_custom,
                             version=version)
            if stix_obj['id'] != stix_id:
                # check - was added to handle erroneous TAXII servers
                stix_obj = None
        else:
            stix_obj = None

        return stix_obj
Beispiel #21
0
    def get(self, stix_id, version=None, _composite_filters=None):
        """Retrieve STIX object from local/remote STIX Collection
        endpoint.

        Args:
            stix_id (str): The STIX ID of the STIX object to be retrieved.
            _composite_filters (FilterSet): collection of filters passed from the parent
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (STIX object): STIX object that has the supplied STIX ID.
                The STIX object is received from TAXII has dict, parsed into
                a python STIX object and then returned

        """
        # combine all query filters
        query = FilterSet()

        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # dont extract TAXII filters from query (to send to TAXII endpoint)
        # as directly retrieveing a STIX object by ID
        try:
            stix_objs = self.collection.get_object(stix_id)["objects"]
            stix_obj = list(apply_common_filters(stix_objs, query))

        except HTTPError as e:
            if e.response.status_code == 404:
                # if resource not found or access is denied from TAXII server, return None
                stix_obj = []
            else:
                raise DataSourceError(
                    "TAXII Collection resource returned error", e)

        if len(stix_obj):
            stix_obj = parse(stix_obj[0],
                             allow_custom=self.allow_custom,
                             version=version)
            if stix_obj.id != stix_id:
                # check - was added to handle erroneous TAXII servers
                stix_obj = None
        else:
            stix_obj = None

        return stix_obj
def _check_object_from_file(query, filepath, allow_custom, version, encoding):
    """
    Read a STIX object from the given file, and check it against the given
    filters.

    Args:
        query: Iterable of filters
        filepath (str): Path to file to read
        allow_custom (bool): Whether to allow custom properties as well unknown
        custom objects.
        version (str): If present, it forces the parser to use the version
            provided. Otherwise, the library will make the best effort based
            on checking the "spec_version" property.
        encoding (str): The encoding to use when reading a file from the
            filesystem.

    Returns:
        The (parsed) STIX object, if the object passes the filters.  If
        not, None is returned.

    Raises:
        TypeError: If the file had invalid JSON
        IOError: If there are problems opening/reading the file
        stix2.exceptions.STIXError: If there were problems creating a STIX
            object from the JSON

    """
    try:
        with io.open(filepath, "r", encoding=encoding) as f:
            stix_json = json.load(f)
    except ValueError:  # not a JSON file
        raise TypeError(
            "STIX JSON object at '{0}' could either not be parsed "
            "to JSON or was not valid STIX JSON".format(filepath),
        )

    stix_obj = parse(stix_json, allow_custom, version)

    if stix_obj["type"] == "bundle":
        stix_obj = stix_obj["objects"][0]

    # check against other filters, add if match
    result = next(apply_common_filters([stix_obj], query), None)

    return result
Beispiel #23
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retreive STIX objects based on the complete query

        A "complete query" includes the filters from the query, the filters
        attached to MemorySource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters)

        Args:
            query (list): list of filters to search on
            version (str): If present, it forces the parser to use the version
                provided. Otherwise, the library will make the best effort based
                on checking the "spec_version" property.
            _composite_filters (FilterSet): collection of filters passed from
                the CompositeDataSource, not user supplied

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are received from TAXII as dicts,
                parsed into python STIX objects and then returned.

        """
        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # parse taxii query params (that can be applied remotely)
        taxii_filters = self._parse_taxii_filters(query)

        # taxii2client requires query params as keywords
        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)

        # query TAXII collection
        all_data = []
        paged_request = tcv21.as_pages if isinstance(
            self.collection, tcv21.Collection) else tcv20.as_pages
        try:
            for resource in paged_request(self.collection.get_objects,
                                          per_request=self.items_per_page,
                                          **taxii_filters_dict):
                all_data.extend(resource.get("objects", []))
        except HTTPError as e:
            # if resources not found or access is denied from TAXII server, return empty list
            if e.response.status_code == 404:
                raise DataSourceError(
                    "The requested STIX objects for the TAXII Collection resource defined in"
                    " the supplied TAXII Collection object are either not found or access is"
                    " denied. Received error: ",
                    e,
                )

            # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) if
            # the server isn't sending Content-Range headers, so the pager just
            # goes until it runs out of pages.  So 416 can't be treated as a
            # real error, just an end-of-pages condition.  For other codes,
            # propagate the exception.
            elif e.response.status_code != 416:
                raise

        # deduplicate data (before filtering as reduces wasted filtering)
        all_data = deduplicate(all_data)

        # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
        query.remove(taxii_filters)
        all_data = list(apply_common_filters(all_data, query))

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs
Beispiel #24
0
    def query(self, query=None, version=None, _composite_filters=None):
        """Search and retrieve STIX objects based on the complete query.

        A "complete query" includes the filters from the query, the filters
        attached to this FileSystemSource, and any filters passed from a
        CompositeDataSource (i.e. _composite_filters).

        Args:
            query (list): list of filters to search on
            _composite_filters (FilterSet): collection of filters passed from the
                CompositeDataSource, not user supplied
            version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
                None, use latest version.

        Returns:
            (list): list of STIX objects that matches the supplied
                query. The STIX objects are loaded from their json files,
                parsed into a python STIX objects and then returned.

        """

        all_data = []

        query = FilterSet(query)

        # combine all query filters
        if self.filters:
            query.add(self.filters)
        if _composite_filters:
            query.add(_composite_filters)

        # extract any filters that are for "type" or "id" , as we can then do
        # filtering before reading in the STIX objects. A STIX 'type' filter
        # can reduce the query to a single sub-directory. A STIX 'id' filter
        # allows for the fast checking of the file names versus loading it.
        file_filters = self._parse_file_filters(query)

        # establish which subdirectories can be avoided in query
        # by decluding as many as possible. A filter with "type" as the property
        # means that certain STIX object types can be ruled out, and thus
        # the corresponding subdirectories as well
        include_paths = []
        declude_paths = []
        if "type" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "type":
                    if filter.op == "=":
                        include_paths.append(
                            os.path.join(self._stix_dir, filter.value))
                    elif filter.op == "!=":
                        declude_paths.append(
                            os.path.join(self._stix_dir, filter.value))
        else:
            # have to walk entire STIX directory
            include_paths.append(self._stix_dir)

        # if a user specifies a "type" filter like "type = <stix-object_type>",
        # the filter is reducing the search space to single stix object types
        # (and thus single directories). This makes such a filter more powerful
        # than "type != <stix-object_type>" bc the latter is substracting
        # only one type of stix object type (and thus only one directory),
        # As such the former type of filters are given preference over the latter;
        # i.e. if both exist in a query, that latter type will be ignored

        if not include_paths:
            # user has specified types that are not wanted (i.e. "!=")
            # so query will look in all STIX directories that are not
            # the specified type. Compile correct dir paths
            for dir in os.listdir(self._stix_dir):
                if os.path.abspath(os.path.join(self._stix_dir,
                                                dir)) not in declude_paths:
                    include_paths.append(
                        os.path.abspath(os.path.join(self._stix_dir, dir)))

        # grab stix object ID as well - if present in filters, as
        # may forgo the loading of STIX content into memory
        if "id" in [filter.property for filter in file_filters]:
            for filter in file_filters:
                if filter.property == "id" and filter.op == "=":
                    id_ = filter.value
                    break
            else:
                id_ = None
        else:
            id_ = None

        # now iterate through all STIX objs
        for path in include_paths:
            for root, dirs, files in os.walk(path):
                for file_ in files:
                    if not file_.endswith(".json"):
                        # skip non '.json' files as more likely to be random non-STIX files
                        continue

                    if not id_ or id_ == file_.split(".")[0]:
                        # have to load into memory regardless to evaluate other filters
                        try:
                            stix_obj = json.load(
                                open(os.path.join(root, file_)))

                            if stix_obj["type"] == "bundle":
                                stix_obj = stix_obj["objects"][0]

                            # naive STIX type checking
                            stix_obj["type"]
                            stix_obj["id"]

                        except (ValueError,
                                KeyError):  # likely not a JSON file
                            raise TypeError(
                                "STIX JSON object at '{0}' could either not be parsed to "
                                "JSON or was not valid STIX JSON".format(
                                    os.path.join(root, file_)))

                        # check against other filters, add if match
                        all_data.extend(apply_common_filters([stix_obj],
                                                             query))

        all_data = deduplicate(all_data)

        # parse python STIX objects from the STIX object dicts
        stix_objs = [
            parse(stix_obj_dict,
                  allow_custom=self.allow_custom,
                  version=version) for stix_obj_dict in all_data
        ]

        return stix_objs