Ejemplo n.º 1
0
    def setUp(self):
        self.mock = MagicMock(__class__=Anonymizer)
        self.meth = MethodProxy(Anonymizer, self.mock)

        self.mock.data_spec = N6DataSpec()
        self.mock.auth_api.get_anonymized_source_mapping.return_value = {
            'forward_mapping': self.forward_source_mapping,
        }
        self.mock.auth_api.get_dip_anonymization_disabled_source_ids.return_value = frozenset()
Ejemplo n.º 2
0
 def get_concrete_view_class_kwargs(cls, view_class, request):
     d = super(TestRestAPIViewBase_prepare_params,
               cls).get_concrete_view_class_kwargs(view_class, request)
     d.update({
         'resource_id': '/search/events',
         'data_spec': N6DataSpec(),
         'data_backend_api_method': 'search_events',
         'renderers': 'json',
     })
     return d
Ejemplo n.º 3
0
            [time_max_until] = cleaned_param_dict.get(
                'time.max', cleaned_param_dict.get('time.until', [None]))
            assert isinstance(time_max_until, datetime.datetime)
            time_min = min(time_min, time_max_until - default_delta)
        redir_params['time_min'] = time_min
        redir_template_parts.append('time.min={time_min}')

        return self._get_redirect_url(redir_template_parts, redir_params)

    def _get_redirect_url(self, template_parts, params):
        template = '&'.join(template_parts)
        query_str = template.format(**params)
        return "{}?{}".format(self.request.path_url, query_str)


n6_data_spec = N6DataSpec()
n6_inside_data_spec = N6InsideDataSpec()

STREAM_RENDERERS = [
    'json',
    'csv',
    'sjson',
    'snort-dns',
    'snort-http',
    'snort-ip',
    'snort-ip-bl',
    'suricata-dns',
    'suricata-http',
    'suricata-ip',
    'suricata-ip-bl',
]
Ejemplo n.º 4
0
    def setUp(self):
        self.event_type = 'bl-update'

        def YES_predicate(record):
            self.assertIsInstance(record, RecordFacadeForPredicates)
            return True

        def NO_predicate(record):
            self.assertIsInstance(record, RecordFacadeForPredicates)
            return False

        self.mock = MagicMock(__class__=Anonymizer)
        self.meth = MethodProxy(Anonymizer, self.mock)

        self.mock.data_spec = N6DataSpec()
        self.mock.auth_api.get_source_ids_to_subs_to_stream_api_access_infos.return_value = \
            self.s_to_s_to_saai = {
                'src.empty': {},
                'src.some-1': {
                    sen.something_1: (
                        YES_predicate,
                        {
                            'inside': set(),
                            'threats': set(),
                            'search': set(),
                        }
                    ),
                    sen.something_2: (
                        YES_predicate,
                        {
                            'inside': {'o4'},
                            'threats': set(),
                            'search': {'o1', 'o2', 'o3', 'o4', 'o5', 'o6'},
                        }
                    ),
                    sen.something_3: (
                        NO_predicate,
                        {
                            'inside': {'o2'},
                            'threats': {'o3'},
                            'search': set(),
                        }
                    ),
                    sen.something_4: (
                        NO_predicate,
                        {
                            'inside': {'o1', 'o3', 'o9'},
                            'threats': {'o3', 'o5', 'o6'},
                            'search': {'o3', 'o4', 'o5', 'o6'},
                        }
                    ),
                },
                'src.some-2': {
                    sen.something_5: (
                        YES_predicate,
                        {
                            'inside': {'o1', 'o3', 'o9'},
                            'threats': {'o3', 'o5', 'o6'},
                            'search': {'o3', 'o4', 'o5', 'o6'},
                        }
                    ),
                    sen.something_6: (
                        YES_predicate,
                        {
                            'inside': {'o2'},
                            'threats': {'o2'},
                            'search': set(),
                        }
                    ),
                    sen.something_7: (
                        YES_predicate,
                        {
                            'inside': set(),
                            'threats': {'o8'},
                            'search': set(),
                        }
                    ),
                    sen.something_8: (
                        YES_predicate,
                        {
                            'inside': set(),
                            'threats': set(),
                            'search': set(),
                        }
                    ),
                    sen.something_9: (
                        NO_predicate,
                        {
                            'inside': {'o1', 'o5', 'o4', 'o9'},
                            'threats': {'o3', 'o4', 'o5', 'o9'},
                            'search': {'o1', 'o2', 'o3', 'o4'},
                        }
                    ),
                },
            }
Ejemplo n.º 5
0
class RecordDict(collections.MutableMapping):
    """
    Record dict class for non-blacklist events.
    """

    _ADJUSTER_PREFIX = 'adjust_'
    _APPENDER_PREFIX = 'append_'

    data_spec = N6DataSpec()

    required_keys = frozenset(data_spec.result_field_specs('required'))
    optional_keys = frozenset(data_spec.result_field_specs('optional')) | {
        # note: the 'type' item is somewhat related to
        # <parser class>.event_type but *not* to <collector class>.type (!)
        'type',  ## <- FIXME???: shouldn't it be required? (not optional?)
        'enriched',  # (its values are added by enricher)

        # internal keys
        # (items whose keys start with '_' are neither recorded
        #  into database nor used for id computation)
        '_do_not_resolve_fqdn_to_ip',  # flag for enricher
        '_parsed_old',

        # internal keys of aggregated items
        '_group',
        '_first_time',

        # internal keys of blacklist items
        ## FIXME?: shouldn't they be required
        ## (not optional) for BLRecordDict???
        '_bl-series-no',
        '_bl-series-total',
        '_bl-series-id',
        '_bl-time',
        '_bl-current-time',
    }

    # for the following keys, if the given value is invalid,
    # AdjusterError is not propagated; instead the value is just
    # not stored (and a warning is logged)
    without_adjuster_error = frozenset({
        'fqdn',
        'name',
        'url',
        'url_pattern',
    })

    #
    # Instantiation-related methods

    @classmethod
    def from_json(cls, json_string, **kwargs):
        return cls(json.loads(json_string), **kwargs)

    def __init__(self,
                 iterable_or_mapping=(),
                 log_nonstandard_names=False,
                 context_manager_error_callback=None):
        self._dict = {}
        self._settable_keys = (self.required_keys | self.optional_keys)

        # to catch some kinds of bugs early...
        duplicated = self.required_keys & self.optional_keys
        if duplicated:
            raise ValueError('{} has keys declared both '
                             'as required and optional: {}'.format(
                                 self.__class__.__name__,
                                 ', '.join(sorted(duplicated))))

        missing_adjusters = [
            key for key in self._settable_keys
            if not hasattr(self, self._adjuster_name(key))
        ]
        if missing_adjusters:
            raise TypeError('{!r} has no adjusters for keys: {}'.format(
                self, ', '.join(sorted(missing_adjusters))))

        self.log_nonstandard_names = log_nonstandard_names

        # context-manager (__enter__/__exit__) -related stuff
        self.context_manager_error_callback = context_manager_error_callback
        self.used_as_context_manager = False

        self.update(iterable_or_mapping)

    @classmethod
    def _adjuster_name(cls, key):
        return cls._ADJUSTER_PREFIX + key.replace('-', '')

    #
    # Output-related methods

    def get_ready_dict(self):
        current_keys = set(self._dict)
        assert self._settable_keys >= current_keys
        missing_keys = self.required_keys - current_keys
        if missing_keys:
            raise ValueError('missing keys: ' +
                             ', '.join(sorted(missing_keys)))
        ready_dict = copy.deepcopy(self._dict)
        ######## provide the legacy item
        ######## (needed by old version of RecordDict, in not-yet-updated components)
        used_custom_keys = self.data_spec.custom_field_keys.intersection(
            ready_dict)
        if used_custom_keys:
            ready_dict['__preserved_custom_keys__'] = sorted(used_custom_keys)
        ######## ^^^ (to be removed later)
        return ready_dict

    def get_ready_json(self):
        # changed from json.dumps on bson.dumps
        ### XXX: why? bson.json_utils.dumps() pre-converts some values, but is it necessary???
        return dumps(self.get_ready_dict())

    def iter_db_items(self):
        # to be cloned later (see below)
        item_prototype = {
            key: value
            for key, value in self.get_ready_dict().iteritems()
            if not key.startswith('_')
        }  # no internal keys

        # pop actual custom items and place them in the "custom" field
        all_custom_keys = self.data_spec.custom_field_keys
        custom_items = {
            key: item_prototype.pop(key)
            for key in all_custom_keys if key in item_prototype
        }
        if custom_items:
            item_prototype['custom'] = custom_items

        # depending on "address" provide one or more database items (dicts)
        address_list = item_prototype.pop('address',
                                          None)  # NOTE: deleting `address`
        if address_list:
            # the `address` list was present and not empty
            # -> db item for each list item (each db item containing
            # `ip`[/`cc`/`asn`] of the list item + the whole `address`)
            item_prototype['address'] = address_list  # restore
            all_addr_keys_are_legal = {'ip', 'cc', 'asn'}.issuperset
            for addr in address_list:
                assert 'ip' in addr and all_addr_keys_are_legal(addr)
                # cloning the prototype dict...
                db_item = item_prototype.copy()
                # ...and updating the copy with particular address data
                db_item.update(addr)
                yield db_item
        else:
            # the `address` list was *empty* or *not* present
            # -> only one db item *without* `address`, `ip` etc.
            yield item_prototype

    __repr__ = attr_repr('_dict')

    #
    # MutableMapping interface implementation

    def __iter__(self):
        return iter(self._dict)

    def __len__(self):
        return len(self._dict)

    def __getitem__(self, key):
        return self._dict[key]

    def __delitem__(self, key):
        del self._dict[key]

    def __setitem__(self, key, value):
        ######## silently ignore the legacy item
        if key == '__preserved_custom_keys__': return
        ######## ^^^ (to be removed later)
        try:
            self._dict[key] = self._get_adjusted_value(key, value)
        except AdjusterError as exc:
            if key in self.without_adjuster_error:
                LOGGER.warning('Invalid value not stored (%s)', exc)
            else:
                raise

    def _get_adjusted_value(self, key, value):
        if key not in self._settable_keys:
            raise RuntimeError('for {!r}, key {!r} is illegal'.format(
                self, key))

        adjuster_method_name = self._adjuster_name(key)
        try:
            adjuster = getattr(self, adjuster_method_name)
        except AttributeError:
            raise RuntimeError('{!r} has no adjuster for key {!r}'.format(
                self, key))
        if adjuster is None:
            # adjuster explicitly set to None -> passing value unchanged
            return value

        try:
            return adjuster(value)
        except Exception as exc:
            if getattr(exc, 'propagate_it_anyway', False):
                raise
            adjuster_error_msg = ('{!r}.{}({value!r}) raised '
                                  '{exc.__class__.__name__}: {exc}'.format(
                                      self,
                                      adjuster_method_name,
                                      value=value,
                                      exc=exc))
            raise AdjusterError(adjuster_error_msg)

    # reimplementation only for speed
    def __contains__(self, key):
        return key in self._dict

    # reimplementation with slightly different interface
    # and some additional guarantees
    def update(self, iterable_or_mapping=()):
        iterator = (iterable_or_mapping.iteritems() if isinstance(
            iterable_or_mapping, collections.Mapping) else
                    iter(iterable_or_mapping))
        setitem = self.__setitem__
        # updating in a deterministic order: sorted by key (in particular,
        # 'category' is set *before* 'name' -- see adjust_name())
        sorted_items = sorted(iterator)
        for key, value in sorted_items:
            setitem(key, value)

    # record dicts are always deep-copied (to avoid hard-to-find bugs)
    def copy(self):
        return copy.deepcopy(self)

    __copy__ = copy

    #
    # Context manager interface

    def __enter__(self):
        self.used_as_context_manager = True
        return self

    def __exit__(self, exc_type, exc, tb):
        try:
            error_callback = self.context_manager_error_callback
        except AttributeError:
            raise TypeError('a record dict instance cannot be used '
                            'as a guarding context manager more than once')
        try:
            if exc_type is not None and error_callback is not None:
                if exc is None:
                    exc = exc_type()
                return error_callback(exc)
        finally:
            del self.context_manager_error_callback

    #
    # Adjusters

    adjust_id = make_adjuster_using_data_spec('id')
    adjust_rid = make_adjuster_using_data_spec('rid')
    adjust_source = make_adjuster_using_data_spec('source')
    adjust_origin = make_adjuster_using_data_spec('origin')
    adjust_restriction = make_adjuster_using_data_spec('restriction')
    adjust_confidence = make_adjuster_using_data_spec('confidence')
    adjust_category = make_adjuster_using_data_spec('category')
    adjust_md5 = make_adjuster_using_data_spec('md5')
    adjust_sha1 = make_adjuster_using_data_spec('sha1')
    adjust_proto = make_adjuster_using_data_spec('proto')
    adjust_sport = make_adjuster_using_data_spec('sport')
    adjust_dport = make_adjuster_using_data_spec('dport')
    adjust_count = make_adjuster_using_data_spec('count')

    adjust_time = chained(
        make_adjuster_using_data_spec('time'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str

    adjust_modified = chained(
        make_adjuster_using_data_spec('modified'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str

    adjust_address = chained(
        make_multiadjuster(make_dict_adjuster(ip=ipv4_preadjuster)),
        applied_for_nonfalse(make_adjuster_using_data_spec('address')))

    adjust_dip = chained(ipv4_preadjuster,
                         make_adjuster_using_data_spec('dip'))

    adjust_url = chained(
        url_preadjuster, make_adjuster_using_data_spec('url',
                                                       on_too_long=trim))

    adjust_fqdn = make_adjuster_using_data_spec('fqdn',
                                                on_too_long=trim_domain)

    adjust_client = chained(
        make_multiadjuster(),
        applied_for_nonfalse(make_adjuster_using_data_spec('client')))

    adjust_until = chained(
        make_adjuster_using_data_spec('until'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str

    adjust_expires = chained(
        make_adjuster_using_data_spec('expires'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str

    adjust_target = make_adjuster_using_data_spec('target', on_too_long=trim)

    adjust_type = make_adjuster_using_data_spec('_type')

    # generic internal field adjusters
    adjust__do_not_resolve_fqdn_to_ip = ensure_isinstance(bool)
    adjust__parsed_old = rd_adjuster

    # hi-freq-only internal field adjusters
    adjust__group = unicode_adjuster
    adjust__first_time = chained(
        make_adjuster_using_data_spec('_first_time'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str

    # bl-only non-internal field adjusters
    adjust_status = make_adjuster_using_data_spec('status')
    adjust_replaces = make_adjuster_using_data_spec('replaces')

    # bl-only internal field adjusters
    adjust__blseriesno = make_adjuster_using_data_spec('_blseriesno')
    adjust__blseriestotal = make_adjuster_using_data_spec('_blseriestotal')
    adjust__blseriesid = make_adjuster_using_data_spec('_blseriesid')
    adjust__bltime = chained(
        make_adjuster_using_data_spec('_bltime'),  # will return datetime
        make_adjuster_applying_callable(str))  # will transform it to str
    adjust__blcurrenttime = chained(
        make_adjuster_using_data_spec('_blcurrenttime'),
        make_adjuster_applying_callable(str))

    # special custom field adjuster
    # (see the comment in the code of n6.utils.enrich.Enricher.enrich())
    adjust_enriched = make_adjuster_using_data_spec('enriched')

    # custom field adjusters
    adjust_adip = make_adjuster_using_data_spec('adip')

    adjust_additional_data = make_adjuster_using_data_spec('additional_data',
                                                           on_too_long=trim)

    adjust_alternative_fqdns = chained(
        make_multiadjuster(),
        make_adjuster_using_data_spec('alternative_fqdns',
                                      on_too_long=trim_domain_seq))

    adjust_description = make_adjuster_using_data_spec('description',
                                                       on_too_long=trim)

    adjust_ip_network = make_adjuster_using_data_spec('ip_network')

    adjust_min_amplification = make_adjuster_using_data_spec(
        'min_amplification', on_too_long=trim)

    adjust_request = make_adjuster_using_data_spec('request', on_too_long=trim)

    adjust_user_agent = make_adjuster_using_data_spec('user_agent',
                                                      on_too_long=trim)

    adjust_sender = make_adjuster_using_data_spec('sender', on_too_long=trim)

    adjust_botid = make_adjuster_using_data_spec('botid', on_too_long=trim)

    adjust_method = make_adjuster_using_data_spec('method', on_too_long=trim)

    adjust_channel = make_adjuster_using_data_spec('channel', on_too_long=trim)

    adjust_first_seen = make_adjuster_using_data_spec('first_seen',
                                                      on_too_long=trim)

    adjust_referer = make_adjuster_using_data_spec('referer', on_too_long=trim)

    adjust_proxy_type = make_adjuster_using_data_spec('proxy_type',
                                                      on_too_long=trim)

    adjust_dns_version = make_adjuster_using_data_spec('dns_version',
                                                       on_too_long=trim)

    adjust_internal_ip = make_adjuster_using_data_spec('internal_ip',
                                                       on_too_long=trim)

    adjust_ipmi_version = make_adjuster_using_data_spec('ipmi_version',
                                                        on_too_long=trim)

    adjust_mac_address = make_adjuster_using_data_spec('mac_address',
                                                       on_too_long=trim)

    adjust_sysdesc = make_adjuster_using_data_spec('sysdesc', on_too_long=trim)

    adjust_version = make_adjuster_using_data_spec('version', on_too_long=trim)

    adjust_dataset = make_adjuster_using_data_spec('dataset', on_too_long=trim)

    adjust_header = make_adjuster_using_data_spec('header', on_too_long=trim)

    adjust_detected_since = make_adjuster_using_data_spec('detected_since',
                                                          on_too_long=trim)

    adjust_handshake = make_adjuster_using_data_spec('handshake',
                                                     on_too_long=trim)

    adjust_cert_length = make_adjuster_using_data_spec('cert_length',
                                                       on_too_long=trim)

    adjust_subject_common_name = make_adjuster_using_data_spec(
        'subject_common_name', on_too_long=trim)

    adjust_visible_databases = make_adjuster_using_data_spec(
        'visible_databases', on_too_long=trim)

    adjust_url_pattern = make_adjuster_using_data_spec('url_pattern')

    adjust_urls_matched = make_adjuster_using_data_spec('urls_matched')

    adjust_username = make_adjuster_using_data_spec('username')

    adjust_email = make_adjuster_using_data_spec('email')

    adjust_facebook_id = make_adjuster_using_data_spec('facebook_id')

    adjust_iban = make_adjuster_using_data_spec('iban')

    adjust_injects = make_adjuster_using_data_spec('injects')

    adjust_phone = make_adjuster_using_data_spec('phone')

    adjust_registrar = make_adjuster_using_data_spec('registrar',
                                                     on_too_long=trim)

    adjust_x509fp_sha1 = make_adjuster_using_data_spec('x509fp_sha1')

    adjust_x509issuer = make_adjuster_using_data_spec('x509issuer')

    adjust_x509subject = make_adjuster_using_data_spec('x509subject')

    adjust_action = make_adjuster_using_data_spec('action')

    # The attribute and related methods are left for the backward
    # compatibility with older data from the MISP sources.
    adjust_misp_eventdid = make_adjuster_using_data_spec('misp_eventdid')

    adjust_misp_attr_uuid = make_adjuster_using_data_spec('misp_attr_uuid')

    adjust_misp_event_uuid = make_adjuster_using_data_spec('misp_event_uuid')

    adjust_product = make_adjuster_using_data_spec('product')

    # custom field used for cooperation with IntelMQ
    adjust_intelmq = make_adjuster_using_data_spec('intelmq')

    adjust_tags = chained(
        make_multiadjuster(),
        make_adjuster_using_data_spec('tags', on_too_long=trim_seq))

    # the `name` adjuster is a bit more complex...
    @preceded_by(unicode_adjuster)
    def adjust_name(self, value):
        category = self.get('category')
        if category is None:
            exc = RuntimeError('cannot set "name" when "category" is not set')
            exc.propagate_it_anyway = True  # let the programmer know it!
            raise exc
        if not value:
            raise ValueError('empty value')
        if category in CATEGORY_TO_NORMALIZED_NAME:
            value = self._get_normalized_name(value, category)
            value = self._adjust_name_according_to_data_spec(value)
            self._check_and_handle_nonstandard_name(value, category)
        else:
            value = self._adjust_name_according_to_data_spec(value)
        return value

    _adjust_name_according_to_data_spec = make_adjuster_using_data_spec(
        'name', on_too_long=trim)

    def _get_normalized_name(self, value, category):
        value = value.lower()
        first_char = value[0]
        normalization = NAME_NORMALIZATION.get(first_char,
                                               NAME_NORMALIZATION['ELSE'])
        for regex, normalized_value in normalization:
            if regex.search(value):
                value = normalized_value
                break
        return value

    def _check_and_handle_nonstandard_name(self, value, category):
        if self.log_nonstandard_names:
            category_std_names = self._get_category_std_names(category)
            if value not in category_std_names:
                self._log_nonstandard_name(value, category)

    def _get_category_std_names(self, category_key):
        while True:
            category_std_names = CATEGORY_TO_NORMALIZED_NAME[category_key]
            if not isinstance(category_std_names, basestring):
                return category_std_names
            category_key = category_std_names

    # private class attribute: a cache of already logged non-standard names
    # -- used in _log_nonstandard_name() to avoid cluttering the logs
    _already_logged_nonstandard_names = LimitedDict(maxlen=10000)

    def _log_nonstandard_name(
            self,
            value,
            category,
            _already_logged=_already_logged_nonstandard_names):
        if (category, value) not in _already_logged:
            category_sublogger = NONSTANDARD_NAMES_LOGGER.getChild(category)
            category_sublogger.warning(ascii_str(value))
            _already_logged[(category, value)] = None

    #
    # Appenders for multiple-adjusted attributes

    # Providing methods: append_<key> -- for example:
    # * append_address(<singular value>)
    def __getattr__(self, name):
        if name.startswith(self._APPENDER_PREFIX):
            key = name[len(self._APPENDER_PREFIX):]
            adjuster_method_name = self._adjuster_name(key)
            adjuster = getattr(self, adjuster_method_name, None)
            if self._is_multiadjuster(adjuster):

                def appender(singular_value):
                    value_seq = list(self.get(key, []))
                    value_seq.append(singular_value)
                    self[key] = value_seq

                return appender
        raise AttributeError('{.__class__.__name__!r} object has '
                             'no attribute {!r}'.format(self, name))

    @staticmethod
    def _is_multiadjuster(adjuster):
        factory_names = getattr(adjuster, '_factory_names', frozenset())
        return ('make_multiadjuster' in factory_names)
Ejemplo n.º 6
0
class n6NormalizedData(Base):

    __tablename__ = 'event'

    _n6columns = dict(N6DataSpec().generate_sqlalchemy_columns(
        id=dict(primary_key=True),
        time=dict(primary_key=True),
        ip=dict(primary_key=True, autoincrement=False)))

    locals().update(_n6columns)  # hack, but a simple one, and it works :)

    clients = relationship(
        n6ClientToEvent,
        # XXX: is it necessary anymore?
        primaryjoin=and_(
            #_n6columns['time'] == n6ClientToEvent.time, ### redundant join condition
            _n6columns['id'] == n6ClientToEvent.id),
        #foreign_keys=[n6ClientToEvent.time, n6ClientToEvent.id],
        # XXX: is it necessary anymore?
        foreign_keys=[n6ClientToEvent.id],
        backref="events")

    def __init__(self, **kwargs):
        if kwargs.get('ip') is None:
            # adding the "no IP" placeholder ('0.0.0.0') which should be
            # transformed into 0 in the database (because `ip` cannot be
            # NULL in our SQL db; and apparently, for unknown reason,  # XXX: <- check whether that's true...
            # IPAddress.process_bind_param() is not called by the
            # SQLAlchemy machinery if the value of `ip` is just None)
            kwargs['ip'] = IPAddress.NONE_STR
        kwargs['time'] = parse_iso_datetime_to_utc(kwargs["time"])
        kwargs['expires'] = (parse_iso_datetime_to_utc(kwargs.get("expires"))
                             if kwargs.get("expires") is not None else None)
        kwargs['modified'] = (parse_iso_datetime_to_utc(kwargs.get("modified"))
                              if kwargs.get("modified") is not None else None)
        for name in self._n6columns:
            setattr(self, name, kwargs.pop(name, None))
        ### XXX: the 'until' field is not converted here to utc datetime!
        ### (see ticket #3113)

        kwargs.pop('client', None)  # here we just ignore this arg if present
        kwargs.pop('type', None)  # here we just ignore this arg if present
        if kwargs:
            LOGGER.warning(
                'n6NormalizedData.__init__() got unexpected **kwargs: %a',
                kwargs)

    @classmethod
    def get_column_mapping_attrs(cls):
        return [getattr(cls, name) for name in sorted(cls._n6columns)]

    @classmethod
    def key_query(cls, key, value):
        return getattr(cls, key).in_(value)

    @classmethod
    def like_query(cls, key, value):
        mapping = {"url.sub": "url", "fqdn.sub": "fqdn"}
        return or_(*[
            getattr(cls, mapping[key]).like(u"%{}%".format(val))
            for val in value
        ])

    @classmethod
    def url_b64_experimental_query(cls, key, value):
        # *EXPERIMENTAL* (likely to be changed or removed in the future
        # without any warning/deprecation/etc.)
        if key != 'url.b64':
            raise AssertionError("key != 'url.b64' (but == {!a})".format(key))
        db_key = 'url'
        url_search_keys = list(map(make_provisional_url_search_key, value))
        return or_(
            getattr(cls, db_key).in_(value),
            getattr(cls, db_key).in_(url_search_keys))

    @classmethod
    def ip_net_query(cls, key, value):
        if key != 'ip.net':
            # (`assert` not used because of the check in a unit test...)
            raise AssertionError
        queries = []
        for val in value:
            min_ip, max_ip = ip_network_tuple_to_min_max_ip(val)
            queries.append(and_(cls.ip >= min_ip, cls.ip <= max_ip))
        return or_(*queries)

    @classmethod
    def active_bl_query(cls, key, value):
        assert len(value) == 1
        value = value[0]
        if key == "active.min":
            return or_(cls.expires >= value, cls.time >= value)
        elif key == "active.max":
            return or_(and_(cls.expires.isnot(null()), cls.expires <= value),
                       and_(cls.expires.is_(null()), cls.time <= value))
        elif key == "active.until":
            return or_(and_(cls.expires.isnot(null()), cls.expires < value),
                       and_(cls.expires.is_(null()), cls.time < value))
        else:
            raise AssertionError

    @classmethod
    def modified_query(cls, key, value):
        assert len(value) == 1
        value = value[0]
        if key == "modified.min":
            return cls.modified >= value
        elif key == "modified.max":
            return cls.modified <= value
        elif key == "modified.until":
            return cls.modified < value
        else:
            raise AssertionError

    def to_raw_result_dict(self):
        client_org_ids = (c.client for c in self.clients)  # noqa
        return make_raw_result_dict(self, client_org_ids)
Ejemplo n.º 7
0
 def __init__(self, **kwargs):
     LOGGER.info("Anonymizer Start")
     super(Anonymizer, self).__init__(**kwargs)
     self.auth_api = AuthAPI()
     self.data_spec = N6DataSpec()
Ejemplo n.º 8
0
class n6NormalizedData(Base):

    __tablename__ = 'event'

    _n6columns = dict(N6DataSpec().generate_sqlalchemy_columns(
        id=dict(primary_key=True),
        time=dict(primary_key=True),
        ip=dict(primary_key=True, autoincrement=False)))

    locals().update(_n6columns)  # hack, but a simple one, and it works :)

    clients = relationship(
        n6ClientToEvent,
        primaryjoin=and_(
            #_n6columns['time'] == n6ClientToEvent.time, ### redundant join condition
            _n6columns['id'] == n6ClientToEvent.id),
        #foreign_keys=[n6ClientToEvent.time, n6ClientToEvent.id],
        foreign_keys=[n6ClientToEvent.id],
        backref="events")

    def __init__(self, **kwargs):
        if kwargs.get('ip') is None:
            # adding the "no IP" placeholder ('0.0.0.0') which should be
            # transformed into 0 in the database (because `ip` cannot be
            # NULL in our SQL db; and apparently, for unknown reason,
            # IPAddress.process_bind_param() is not called by the
            # SQLAlchemy machinery if the value of `ip` is just None)
            kwargs['ip'] = IPAddress.NONE_STR
        kwargs['time'] = parse_iso_datetime_to_utc(kwargs["time"])
        kwargs['expires'] = (parse_iso_datetime_to_utc(kwargs.get("expires"))
                             if kwargs.get("expires") is not None else None)
        kwargs['modified'] = (parse_iso_datetime_to_utc(kwargs.get("modified"))
                              if kwargs.get("modified") is not None else None)
        for name in self._n6columns:
            setattr(self, name, kwargs.pop(name, None))
        ### XXX: the 'until' field is not converted here to utc datetime!
        ### (see ticket #3113)

        kwargs.pop('client', None)  # here we just ignore this arg if present
        kwargs.pop('type', None)  # here we just ignore this arg if present
        if kwargs:
            LOGGER.warning(
                'n6NormalizedData.__init__() got unexpected **kwargs: %r',
                kwargs)

    @classmethod
    def key_query(cls, key, value):
        return getattr(cls, key).in_(value)

    @classmethod
    def like_query(cls, key, value):
        mapping = {"url.sub": "url", "fqdn.sub": "fqdn"}
        return or_(*[
            getattr(cls, mapping[key]).like("%{}%".format(val))
            for val in value
        ])

    @classmethod
    def ip_net_query(cls, key, value):
        if key != 'ip.net':
            # (`assert` not used because of the check in a unit test...)
            raise AssertionError
        queries = []
        for val in value:
            min_ip, max_ip = ip_network_tuple_to_min_max_ip(val)
            queries.append(and_(cls.ip >= min_ip, cls.ip <= max_ip))
        return or_(*queries)

    @classmethod
    def active_bl_query(cls, key, value):
        assert len(value) == 1
        value = value[0]
        if key == "active.min":
            return or_(cls.expires >= value, cls.time >= value)
        elif key == "active.max":
            return or_(and_(cls.expires.isnot(null()), cls.expires <= value),
                       and_(cls.expires.is_(null()), cls.time <= value))
        elif key == "active.until":
            return or_(and_(cls.expires.isnot(null()), cls.expires < value),
                       and_(cls.expires.is_(null()), cls.time < value))
        else:
            raise AssertionError

    @classmethod
    def modified_query(cls, key, value):
        assert len(value) == 1
        value = value[0]
        if key == "modified.min":
            return cls.modified >= value
        elif key == "modified.max":
            return cls.modified <= value
        elif key == "modified.until":
            return cls.modified < value
        else:
            raise AssertionError

    # names of columns whose `type` attribute
    # is IPAddress or its subclass/instance
    _ip_column_names = tuple(
        sorted(name for name, column in _n6columns.iteritems()
               if isinstance(column.type, IPAddress) or (
                   isinstance(column.type, (type, types.ClassType))
                   and issubclass(column.type, IPAddress))))

    # possible "no IP" placeholder values (such that they
    # cause recording `ip` in db as 0) -- excluding None
    _no_ip_placeholders = frozenset([IPAddress.NONE_STR, IPAddress.NONE, -1])

    def to_raw_result_dict(
            self,
            # for faster (local) access:
            _getattr=getattr,
            _ip_column_names=_ip_column_names,
            _no_ip_placeholders=_no_ip_placeholders):

        # make the dict, skipping all None values
        columns = self.__table__.columns
        result_dict = {
            name: value
            for name, value in [(c.name, _getattr(self, c.name))
                                for c in columns] if value is not None
        }

        # get rid of any "no IP" placeholders (note: probably, this is not
        # needed when the instance has been obtained by a DB operation so
        # IPAddress.process_result_value() was used by the SQLAlchemy
        # machinery)
        for ip_col_name in _ip_column_names:
            value = result_dict.get(ip_col_name)
            if value in _no_ip_placeholders:
                del result_dict[ip_col_name]

        # set the 'client' item
        client = [c.client for c in self.clients]
        if client:
            result_dict['client'] = client

        return result_dict
Ejemplo n.º 9
0
 def setUp(self):
     self.ds = N6DataSpec()