Exemple #1
0
 def validate_bl_headers(self, message):
     if ('_bl-series-id' not in message or '_bl-series-total' not in message
             or '_bl-series-no' not in message or '_bl-time' not in message
             or 'expires' not in message):
         raise n6QueueProcessingException(
             "Invalid message for a black list")
     try:
         parse_iso_datetime_to_utc(message["expires"])
     except ValueError:
         raise n6QueueProcessingException("Invalid expiry date")
Exemple #2
0
 def currdb(self, value):
     value_str = str(value)
     if len(value_str) >= 64 or len(value_str) < 1:
         LOGGER.error('to long db name in mongo, max 63 chars, min 1 char : %a', value_str)
         raise n6QueueProcessingException("to long db name in mongo, max 63 chars, min 1 char"
                                          ": {0}".format(value_str))
     for forbidden_char in FORBIDDEN_DB_NAME_CHAR:
         if forbidden_char in value_str:
             LOGGER.error('name of db: %a, contains forbidden_char: %a', value_str,
                          forbidden_char)
             raise n6QueueProcessingException("name of db: {}, "
                                              "contains forbidden_char: {}".format(value_str, forbidden_char))
     self._currdb = value
Exemple #3
0
 def input_callback(self, routing_key, body, properties):
     record_dict = RecordDict.from_json(body)
     with self.setting_error_event_info(record_dict):
         data = dict(record_dict) ## FIXME?: maybe it could be just the record_dict?
         if '_group' not in data:
             raise n6QueueProcessingException("Hi-frequency source missing '_group' field.")
         self.process_event(data)
Exemple #4
0
 def currcoll(self, value):
     if value is None:
         self._currcoll = value
         return
     value_str = str(value)
     m = re.match(first_letter_collection_name, value_str)
     if not m or len(value_str) < 1:
         raise n6QueueProcessingException('Collection names should begin with an underscore '
                                          'or a letter character, and not be an empty string '
                                          '(e.g. ""), and not begin with the system. prefix. '
                                          '(Reserved for internal use.)')
     for forbidden_char in FORBIDDEN_COLLECTION_NAME_CHAR:
         if forbidden_char in value_str:
             LOGGER.error('name of collection: %a, contains forbidden_char: %a', value_str,
                          forbidden_char)
             raise n6QueueProcessingException("name of collection: {0}, "
                                              "contains forbidden_char: {1}".
                                              format(value_str, forbidden_char))
     self._currcoll = value
Exemple #5
0
    def process_event(self, data):
        event_time = parse_iso_datetime_to_utc(data['time'])
        event = self.groups.get(data['_group'])
        if self.time is None:
            self.time = event_time
        if event_time + self.time_tolerance < self.time:
            if event is None or event.first > event_time:
                LOGGER.error('Event out of order. Ignoring. Data: %s', data)
                raise n6QueueProcessingException('Event out of order.')
            else:
                LOGGER.info('Event out of order, but not older than group\'s first event, '
                            'so it will be added to existing aggregate group. Data: %s', data)
                event.until = max(event.until, event_time)
                event.count += 1  # XXX: see ticket #6243
                return False

        if event is None:
            if event_time < self.time:
                # unordered event, self.buffer may contain suppressed event
                LOGGER.debug("Unordered event of the '%s' group, '%s' source within time "
                             "tolerance. Check and update buffer.", data['_group'], data['source'])
                buffered_event = self.buffer.get(data['_group'])
                if buffered_event is not None:
                    buffered_event.count += 1  # XXX: see ticket #6243
                    self.buffer[data['_group']] = buffered_event
                    return False
            # Event not seen before - add new event to group
            LOGGER.debug("A new group '%s' for '%s' source began to be aggregated, "
                         "first event is being generated.", data['_group'], data['source'])
            self.groups[data['_group']] = HiFreqEventData(data)  # XXX: see ticket #6243
            self.update_time(parse_iso_datetime_to_utc(data['time']))
            return True

        if (event_time > event.until + datetime.timedelta(hours=AGGREGATE_WAIT) or
                event_time.date() > self.time.date()):
            LOGGER.debug("A suppressed event is generated for the '%s' group of "
                         "'%s' source due to passing of %s hours between events.",
                         data['_group'], data['source'], AGGREGATE_WAIT)
            # 24 hour aggregation or AGGREGATE_WAIT time passed between events in group
            del self.groups[data['_group']]
            self.groups[data['_group']] = HiFreqEventData(data)  # XXX: see ticket #6243
            self.buffer[data['_group']] = event
            self.update_time(parse_iso_datetime_to_utc(data['time']))
            return True

        # Event for existing group and still aggregating
        LOGGER.debug("Event is being aggregated in the '%s' group of the '%s' source.",
                     data['_group'], data['source'])
        event.count += 1  # XXX: see ticket #6243
        if event_time > event.until:
            event.until = event_time
        del self.groups[data['_group']]
        self.groups[data['_group']] = event
        self.update_time(parse_iso_datetime_to_utc(data['time']))
        return False
Exemple #6
0
 def _process_input(self, data):
     self.validate_bl_headers(data)
     if not self.state.is_message_valid(data):
         raise n6QueueProcessingException(
             "Invalid message for a series: {}".format(data))
     self.state.update_series(data)
     self.set_timeout(data["source"], data["_bl-series-id"])
     self.process_event(data)
     if self.state.is_series_complete(data["_bl-series-id"]):
         LOGGER.info("Finalizing series: %r", data["_bl-series-id"])
         self.finalize_series(data["_bl-series-id"], data["source"])
Exemple #7
0
 def get_event_key(self, data):
     if data.get("url") is not None:
         return data.get("url")
     elif data.get("fqdn") is not None:
         return data.get("fqdn")
     elif data.get("address") is not None:
         ips = tuple(
             sorted([str(addr["ip"]) for addr in data.get("address")]))
         return ips
     else:
         raise n6QueueProcessingException(
             'Unable to determine event key for source: {}. Event '
             'must have at least one of `url`, `fqdn`, '
             '`address`, data: {}'.format(data['source'], data))
Exemple #8
0
    def input_callback(self, routing_key, body, properties):
      #t0 = time.time()
      #try:
        """
        Channel callback method.

        Args:
            `routing_key` : routing_key from AMQP.
            `body` : message body from AMQP.
            `properties` : properties from AMQP. Required for the next processing

        Raises:
            `n6QueueProcessingException`:
                From JsonStream/FileGridfs or when message type is unknown.
            Other exceptions (e.g. pymongo.errors.DuplicateKeyError).
        """
        #  Headers required for the next processing
        if properties.headers is None:
           properties.headers = {}

        # Suspend writing to Mongo if header is set to False
        try:
            writing = properties.headers['write_to_mongo']
        except KeyError:
            writing = True

        LOGGER.debug("Received properties :%a", properties)
        LOGGER.debug("Received headers :%a", properties.headers)
        # set collection name
        self.manager.currcoll = routing_key

        # Add to archive
        if writing:
            type_ = properties.type
            payload = (body.encode('utf-8') if isinstance(body, str)
                       else body)

            if type_ == 'stream':
                s = JsonStream(dbmanager=self.manager, properties=properties)
                s.preparations_data(payload)
            elif type_ == 'file':
                s = FileGridfs(dbmanager=self.manager, properties=properties)
                s.preparations_data(payload)
            elif type_ == 'blacklist':
                s = BlackListCompacter(dbmanager=self.manager, properties=properties)
                s.preparations_data(payload)
                s.start()
            else:
                raise n6QueueProcessingException(
                    "Unknown message type: {0}, source: {1}".format(type_, routing_key))
Exemple #9
0
    def process_event(self, data):
        event_time = parse_iso_datetime_to_utc(data['_bl-time'])

        if self.time is None:
            self.time = event_time
        if event_time < self.time:
            LOGGER.error('Event out of order. Ignoring.\nData: %s', data)
            raise n6QueueProcessingException(
                'Event belongs to blacklist'
                ' older than the last one processed.')

        event_key = self.get_event_key(data)
        event = self.blacklist.get(event_key)

        if event is None:
            # new bl event
            new_event = BlackListData(data)
            new_event.flag = data.get("_bl-series-id")
            self.blacklist[event_key] = new_event
            return 'bl-new', new_event.payload
        else:
            # existing
            ips_old = event.ip
            ips_new = [x["ip"] for x in data.get("address")
                       ] if data.get("address") is not None else []
            if self._are_ips_different(ips_old, ips_new):
                data["replaces"] = event.id
                new_event = BlackListData(data)
                new_event.flag = data.get("_bl-series-id")
                self.blacklist[event_key] = new_event
                return "bl-change", new_event.payload
            elif parse_iso_datetime_to_utc(
                    data.get("expires")) != event.expires:
                event.expires = parse_iso_datetime_to_utc(data.get("expires"))
                event.flag = data.get("_bl-series-id")
                event.update_payload({"expires": data.get("expires")})
                self.blacklist[event_key] = event
                return "bl-update", event.payload
            else:
                event.flag = data.get("_bl-series-id")
                self.blacklist[event_key] = event
                return None, event.payload
Exemple #10
0
    def write(self):
        """
        Write data to db as json store.

        Raises:
            `UnicodeDecodeError` when collection name or the database name is not allowed
            `pymongo.errors.AutoReconnect` when problem with connection to mongo.
            `n6QueueProcessingException` if catch other exception.
        """
        LOGGER.debug('Stream inserting...')
        LOGGER.debug('HEADER: %a', self.headers)
        self.data['data'] = self.raw
        self.data['uploadDate'] = datetime.datetime.utcfromtimestamp(time.time())
        self.data.update(self.headers['meta'])

        # for backup msg
        self.dbm.backup_msg_data = self.data
        self.dbm.backup_msg_headers = self.headers

        try:
            try:
                if self.dbm.currcoll not in self.dbm.indexes_store:
                    self.create_indexes(self.dbm.get_conn_collection())

                self.dbm.get_conn_collection().insert(self.data)
            except pymongo.errors.OperationFailure as exc:
                if exc.code == INSUFFICIENT_DISK_SPACE_CODE:
                    sys.exit(ascii(exc))
                raise
        except pymongo.errors.AutoReconnect as exc:
            LOGGER.error('%a', exc)
            raise
        except UnicodeDecodeError as exc:
            LOGGER.error("collection name or the database name is not allowed: %a, %a, %a",
                         self.dbm.currdb, self.dbm.currcoll, exc)
            raise
        except Exception as exc:
            LOGGER.error('save data in mongodb FAILED, header: %a , exception: %a',
                         self.headers, exc)
            raise n6QueueProcessingException('save data in mongob FAILED')
        else:
            LOGGER.debug('Insert done.')
Exemple #11
0
    def write(self):
        """
        Write data to db as GridFS store.

        Raises:
            `UnicodeDecodeError` when collection name or the database name is not allowed.
            `pymongo.errors.AutoReconnect` when problem with connection to mongo.
            `n6QueueProcessingException` if catch other exception.
        """
        LOGGER.debug('Binary inserting...')
        LOGGER.debug('HEADER: %a', self.headers)

        # for backup msg
        self.dbm.backup_msg_data = self.data
        self.dbm.backup_msg_headers = self.headers

        try:
            try:
                self.dbm.get_conn_gridfs()
                coll = self.dbm.get_conn_collection().files
                if coll.name not in self.dbm.indexes_store:
                    self.create_indexes(coll)
                self.dbm.put_file_to_db(self.data, **self.headers['meta'])
            except pymongo.errors.OperationFailure as exc:
                if exc.code == INSUFFICIENT_DISK_SPACE_CODE:
                    sys.exit(ascii(exc))
                raise
        except pymongo.errors.AutoReconnect as exc:
            LOGGER.error('%a', exc)
            raise
        except UnicodeDecodeError as exc:
            LOGGER.error("collection name or the database name is not allowed: %a, %a, %a",
                         self.dbm.currdb, self.dbm.currcoll, exc)
            raise
        except Exception as exc:
            LOGGER.error('save data in mongodb FAILED, header: %a , exception: %a',
                         self.headers, exc)
            raise n6QueueProcessingException('save data in mongob FAILED')
        else:
            LOGGER.debug('Saving data, with meta key, done')
Exemple #12
0
    def save_file_in_db(self, marker, data):
        """
              Save  file in DB

              Args: `marker` int,  0 - init file, 1,2,...,self.period - diff files
                      `data` file

              Return: None

              Raises:
                     `pymongo.errors.AutoReconnect` when problem with connection to mongo.
                     `n6QueueProcessingException` if catch other exception.
              """
        # marker indicates the beginning of a sequence of file patch, length period.
        # override these attr,  results in a new sequence differences(diffs)
        # override is very important !
        self.headers["meta"]["marker"] = marker
        self.headers["meta"]["prev_id"] = self.prev_id
        # for bakup_msg
        self.data = data
        self.dbm.backup_msg_data = self.data
        self.dbm.backup_msg_headers = self.headers
        try:
            try:
                self.dbm.put_file_to_db(data, **self.headers["meta"])
            except pymongo.errors.OperationFailure as exc:
                if exc.code == INSUFFICIENT_DISK_SPACE_CODE:
                    sys.exit(ascii(exc))
                raise
        except pymongo.errors.AutoReconnect as exc:
            LOGGER.error('%a', exc)
            raise
        except Exception as exc:
            LOGGER.error('save file in mongodb FAILED, header: %a , exception: %a',
                         self.headers, exc)
            raise n6QueueProcessingException('save file in mongob FAILED')
        else:
            LOGGER.debug('save file in db marker: %a', marker)
Exemple #13
0
    def __init__(self, dbmanager=None, properties=None, **kwargs):
        self.dbm = dbmanager
        self.data = {}
        self.raw = None
        self.content_type = None
        self.headers = {}
        if properties:
            if properties.headers:
                self.headers = properties.headers.copy()
                if "meta" in properties.headers:
                    self.headers['meta'].update(properties.headers['meta'])
                else:
                    # empty meta, add key meta, adds key meta
                    # another data  such. rid, received, contentType....
                    self.headers["meta"] = {}
                    LOGGER.debug('No "meta" in headers: %a', properties.headers)
            else:
                # empty headers, add key meta, adds key
                # meta another data  such. rid, received, contentType....
                self.headers["meta"] = {}
                LOGGER.debug('Empty headers: %a', properties.headers)

            if properties.type in ('file', 'blacklist'):
                # content_type required fo type file and blacklist
                try:
                    self.headers['meta'].update({'contentType': properties.content_type})
                except AttributeError as exc:
                    LOGGER.error('No "content_type" in properties: %a', properties.headers)
                    raise
            # always add
            self.headers['meta'].update({'rid': properties.message_id,
                                         'received': self.get_time_created(properties.timestamp)})
        else:
            # empty properties, it is very bad
            raise n6QueueProcessingException("empty properties, it is very bad"
                                             ": {0}".format(properties))