def __connect_pipelines(self): if self.__source_queues: self.logger.debug("Loading source pipeline and queue %r.", self.__source_queues) self.__source_pipeline = PipelineFactory.create( self.parameters, logger=self.logger, direction="source", queues=self.__source_queues) self.__source_pipeline.connect() self.logger.debug("Connected to source queue.") if self.__destination_queues: self.logger.debug("Loading destination pipeline and queues %r.", self.__destination_queues) self.__destination_pipeline = PipelineFactory.create( self.parameters, logger=self.logger, direction="destination", queues=self.__destination_queues) self.__destination_pipeline.connect() self.logger.debug("Connected to destination queues.") else: self.logger.debug("No destination queues to load.") self.logger.info("Pipeline ready.")
def clear_queue(self, queue): """ Clears an exiting queue. First checks if the queue does exist in the pipeline configuration. """ logger.info("Clearing queue {}".format(queue)) source_queues = set() destination_queues = set() for key, value in self.pipepline_configuration.items(): if 'source-queue' in value: source_queues.add(value['source-queue']) if 'destination-queues' in value: destination_queues.update(value['destination-queues']) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(source_queues, "source") pipeline.connect() queues = source_queues.union(destination_queues) if queue not in queues: logger.error("Queue {} does not exist!".format(queue)) return 'not-found' try: pipeline.clear_queue(queue) logger.info("Successfully cleared queue {}".format(queue)) return 'success' except Exception: logger.error("Error while clearing queue {}:\n{}" "".format(queue, traceback.format_exc())) return 'error'
def list_queues(self): source_queues = set() destination_queues = set() for key, value in self.pipepline_configuration.items(): if 'source-queue' in value: source_queues.add(value['source-queue']) if 'destination-queues' in value: destination_queues.update(value['destination-queues']) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(source_queues, "source") pipeline.connect() queues = source_queues.union(destination_queues) counters = pipeline.count_queued_messages(queues) log_list_queues(counters) return_dict = dict() for bot_id, info in self.pipepline_configuration.items(): return_dict[bot_id] = dict() if 'source-queue' in info: return_dict[bot_id]['source_queue'] = ( info['source-queue'], counters[info['source-queue']]) if 'destination-queues' in info: return_dict[bot_id]['destination_queues'] = list() for dest_queue in info['destination-queues']: return_dict[bot_id]['destination_queues'].append( (dest_queue, counters[dest_queue])) return return_dict
def list_queues(self): source_queues, destination_queues, internal_queues, all_queues = self.get_queues( ) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(None, "source") pipeline.connect() counters = pipeline.count_queued_messages(*all_queues) log_list_queues(counters) return_dict = dict() for bot_id, info in self.pipeline_configuration.items(): return_dict[bot_id] = dict() if 'source-queue' in info: return_dict[bot_id]['source_queue'] = ( info['source-queue'], counters[info['source-queue']]) return_dict[bot_id]['internal_queue'] = counters[ info['source-queue'] + '-internal'] if 'destination-queues' in info: return_dict[bot_id]['destination_queues'] = list() for dest_queue in info['destination-queues']: return_dict[bot_id]['destination_queues'].append( (dest_queue, counters[dest_queue])) return return_dict
def clear_queue(self, queue): """ Clears an exiting queue. First checks if the queue does exist in the pipeline configuration. """ logger.info("Clearing queue %s", queue) queues = set() for key, value in self.pipeline_configuration.items(): if 'source-queue' in value: queues.add(value['source-queue']) queues.add(value['source-queue'] + '-internal') if 'destination-queues' in value: queues.update(value['destination-queues']) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(None, "source") pipeline.connect() if queue not in queues: logger.error("Queue %s does not exist!", queue) return 'not-found' try: pipeline.clear_queue(queue) logger.info("Successfully cleared queue %s.", queue) return 'success' except Exception: # pragma: no cover logger.exception("Error while clearing queue %s.", queue) return 'error'
def clear_queue(self, queue): """ Clears an exiting queue. First checks if the queue does exist in the pipeline configuration. """ logger.info("Clearing queue {}".format(queue)) queues = set() for key, value in self.pipeline_configuration.items(): if 'source-queue' in value: queues.add(value['source-queue']) queues.add(value['source-queue'] + '-internal') if 'destination-queues' in value: queues.update(value['destination-queues']) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(queues, "source") pipeline.connect() if queue not in queues: logger.error("Queue {} does not exist!".format(queue)) return 'not-found' try: pipeline.clear_queue(queue) logger.info("Successfully cleared queue {}".format(queue)) return 'success' except Exception: logger.error("Error while clearing queue {}:\n{}" "".format(queue, traceback.format_exc())) return 'error'
def list_queues(self): source_queues, destination_queues, internal_queues, all_queues = self.get_queues() pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(source_queues, "source") pipeline.connect() counters = pipeline.count_queued_messages(*all_queues) log_list_queues(counters) return_dict = dict() for bot_id, info in self.pipeline_configuration.items(): return_dict[bot_id] = dict() if 'source-queue' in info: return_dict[bot_id]['source_queue'] = ( info['source-queue'], counters[info['source-queue']]) return_dict[bot_id]['internal_queue'] = counters[info['source-queue'] + '-internal'] if 'destination-queues' in info: return_dict[bot_id]['destination_queues'] = list() for dest_queue in info['destination-queues']: return_dict[bot_id]['destination_queues'].append( (dest_queue, counters[dest_queue])) return return_dict
def clear_queue(self, queue): """ Clears an exiting queue. First checks if the queue does exist in the pipeline configuration. """ if RETURN_TYPE == 'text': logger.info("Clearing queue %s.", queue) queues = set() for key, value in self.pipeline_configuration.items(): if 'source-queue' in value: queues.add(value['source-queue']) queues.add(value['source-queue'] + '-internal') if 'destination-queues' in value: queues.update(value['destination-queues']) pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(None, "source") pipeline.connect() if queue not in queues: if RETURN_TYPE == 'text': logger.error("Queue %s does not exist!", queue) return 2, 'not-found' try: pipeline.clear_queue(queue) if RETURN_TYPE == 'text': logger.info("Successfully cleared queue %s.", queue) return 0, 'success' except Exception: # pragma: no cover logger.exception("Error while clearing queue %s.", queue) return 1, 'error'
def __connect_pipelines(self): self.logger.debug("Loading source pipeline.") self.__source_pipeline = PipelineFactory.create(self.parameters) self.logger.debug("Loading source queue.") self.__source_pipeline.set_queues(self.__source_queues, "source") self.logger.debug("Source queue loaded {}." "".format(self.__source_queues)) self.__source_pipeline.connect() self.logger.debug("Connected to source queue.") self.logger.debug("Loading destination pipeline.") self.__destination_pipeline = PipelineFactory.create(self.parameters) self.logger.debug("Loading destination queues.") self.__destination_pipeline.set_queues(self.__destination_queues, "destination") self.logger.debug("Destination queues loaded {}." "".format(self.__destination_queues)) self.__destination_pipeline.connect() self.logger.debug("Connected to destination queues.") self.logger.info("Pipeline ready.")
def __connect_pipelines(self): self.logger.debug("Loading source pipeline and queue %r." % self.__source_queues) self.__source_pipeline = PipelineFactory.create(self.parameters) self.__source_pipeline.set_queues(self.__source_queues, "source") self.__source_pipeline.connect() self.logger.debug("Connected to source queue.") if self.__destination_queues: self.logger.debug("Loading destination pipeline and queues %r." "" % self.__destination_queues) self.__destination_pipeline = PipelineFactory.create(self.parameters) self.__destination_pipeline.set_queues(self.__destination_queues, "destination") self.__destination_pipeline.connect() self.logger.debug("Connected to destination queues.") else: self.logger.debug("No destination queues to load.") self.logger.info("Pipeline ready.")
def connect_pipelines(self): self.logger.info("Loading source pipeline.") self.source_pipeline = PipelineFactory.create(self.parameters) self.logger.info("Loading source queue.") self.source_pipeline.set_queues(self.source_queues, "source") self.logger.info("Source queue loaded {}." "".format(self.source_queues)) self.source_pipeline.connect() self.logger.info("Connected to source queue.") self.logger.info("Loading destination pipeline.") self.destination_pipeline = PipelineFactory.create(self.parameters) self.logger.info("Loading destination queues.") self.destination_pipeline.set_queues(self.destination_queues, "destination") self.logger.info("Destination queues loaded {}." "".format(self.destination_queues)) self.destination_pipeline.connect() self.logger.info("Connected to destination queues.") self.logger.info("Pipeline ready.")
def __connect_pipelines(self): self.logger.debug("Loading source pipeline.") self.__source_pipeline = PipelineFactory.create(self.parameters) self.logger.debug("Loading source queue %r." % self.__source_queues) self.__source_pipeline.set_queues(self.__source_queues, "source") self.__source_pipeline.connect() self.logger.debug("Connected to source queue.") if self.__destination_queues: self.logger.debug("Loading destination pipeline.") self.__destination_pipeline = PipelineFactory.create( self.parameters) self.logger.debug("Loading destination queues %r." % self.__destination_queues) self.__destination_pipeline.set_queues(self.__destination_queues, "destination") self.__destination_pipeline.connect() self.logger.debug("Connected to destination queues.") else: self.logger.debug("Not loading destination queues %r." % self.__destination_queues) self.logger.info("Pipeline ready.")
def start(self): self.source_pipeline = None self.destination_pipeline = None starting = True error_on_pipeline = True error_on_message = False self.logger.info('Bot start processing') while True: try: if not starting and (error_on_pipeline or error_on_message): self.logger.info('Bot will restart in %s seconds' % self.parameters.error_retry_delay) time.sleep(self.parameters.error_retry_delay) self.logger.info('Bot woke up') self.logger.info('Trying to start processing again') if error_on_message: error_on_message = False if error_on_pipeline: self.logger.info("Loading source pipeline") self.source_pipeline = PipelineFactory.create(self.parameters) self.logger.info("Loading source queue") self.source_pipeline.set_queues(self.source_queues, "source") self.logger.info("Source queue loaded") self.source_pipeline.connect() self.logger.info("Connected to source queue") self.destination_pipeline = PipelineFactory.create(self.parameters) self.logger.info("Loading destination queues") self.destination_pipeline.set_queues(self.destination_queues, "destination") self.logger.info("Destination queues loaded") self.destination_pipeline.connect() self.logger.info("Connected to destination queues") self.logger.info("Pipeline ready") error_on_pipeline = False if starting: self.logger.info("Start processing") starting = False self.process() self.source_pipeline.sleep(self.parameters.rate_limit) except exceptions.PipelineError, ex: error_on_pipeline = True self.logger.error('Pipeline failed') self.logger.exception("Check the following exception: \n%s" % ex) self.source_pipeline = None self.destination_pipeline = None except Exception, ex: self.logger.error("Bot has found a problem") self.logger.exception(ex) if self.parameters.error_procedure == "retry": if self.parameters.error_max_retries <= 0: pass # FIXME: number of retries is wrong + 1 please! elif self.error_retries_counter < self.parameters.error_max_retries: self.error_retries_counter += 1 elif self.error_retries_counter >= self.parameters.error_max_retries: if self.parameters.error_dump_message: self.dump_message(ex) self.acknowledge_message() else: self.acknowledge_message() else: pass # when bot acknowledge the message, dont need to wait again error_on_message = True else: # error_procedure == "pass" if self.parameters.error_dump_message: self.dump_message(ex) self.acknowledge_message() if self.parameters.error_log_exception: self.logger.exception("Check the following exception: \n%s" % ex) if self.parameters.error_log_message: self.logger.info("Last Correct Message(event): %r" % self.last_message) # FIXME: evaluate if its ok self.logger.info("Current Message(event): %r" % self.current_message) # FIXME: evaluate if its ok
def submit(): parameters = handle_parameters(request.form) temp_file = get_temp_file() if not temp_file: return create_response('No file') destination_pipeline = PipelineFactory.create(PipelineParameters(), logger=app.logger, direction='destination') if not CONFIG.get('destination_pipeline_queue_formatted', False): destination_pipeline.set_queues(CONFIG['destination_pipeline_queue'], "destination") destination_pipeline.connect() time_observation = DateTime().generate_datetime_now() successful_lines = 0 with open(temp_file[0], encoding='utf8') as handle: reader = csv.reader(handle, delimiter=parameters['delimiter'], quotechar=parameters['quotechar'], skipinitialspace=parameters['skipInitialSpace'], escapechar=parameters['escapechar'], ) if parameters['has_header']: next(reader) for _ in range(parameters['skipInitialLines']): next(reader) for lineindex, line in enumerate(reader): event = Event() try: for columnindex, (column, value) in \ enumerate(zip(parameters['columns'], line)): if not column or not value: continue if column.startswith('time.'): parsed = dateutil.parser.parse(value) if not parsed.tzinfo: value += parameters['timezone'] parsed = dateutil.parser.parse(value) value = parsed.isoformat() if column == 'extra': value = handle_extra(value) event.add(column, value) for key, value in parameters.get('constant_fields', {}).items(): if key not in event: event.add(key, value) for key, value in request.form.items(): if not key.startswith('custom_'): continue key = key[7:] if key not in event: event.add(key, value) if CONFIG.get('destination_pipeline_queue_formatted', False): queue_name = CONFIG['destination_pipeline_queue'].format(ev=event) destination_pipeline.set_queues(queue_name, "destination") destination_pipeline.connect() except Exception: continue if 'classification.type' not in event: event.add('classification.type', parameters['classification.type']) if 'classification.identifier' not in event: event.add('classification.identifier', parameters['classification.identifier']) if 'feed.code' not in event: event.add('feed.code', parameters['feed.code']) if 'time.observation' not in event: event.add('time.observation', time_observation, sanitize=False) raw_message = MessageFactory.serialize(event) destination_pipeline.send(raw_message) successful_lines += 1 return create_response('Successfully processed %s lines.' % successful_lines)
def check(self, no_connections=False): retval = 0 if RETURN_TYPE == 'json': output = [] if QUIET: logger.setLevel('WARNING') # loading files and syntax check files = {DEFAULTS_CONF_FILE: None, PIPELINE_CONF_FILE: None, RUNTIME_CONF_FILE: None, BOTS_FILE: None, HARMONIZATION_CONF_FILE: None} if RETURN_TYPE == 'json': output.append(['info', 'Reading configuration files.']) else: self.logger.info('Reading configuration files.') for filename in files: try: with open(filename) as file_handle: files[filename] = json.load(file_handle) except (IOError, ValueError) as exc: # pragma: no cover if RETURN_TYPE == 'json': output.append(['error', 'Coud not load %r: %s.' % (filename, exc)]) else: self.logger.error('Coud not load %r: %s.', filename, exc) retval = 1 if retval: if RETURN_TYPE == 'json': return 1, {'status': 'error', 'lines': output} else: self.logger.error('Fatal errors occurred.') return 1, retval if RETURN_TYPE == 'json': output.append(['info', 'Checking defaults configuration.']) else: self.logger.info('Checking defaults configuration.') try: with open(pkg_resources.resource_filename('intelmq', 'etc/defaults.conf')) as fh: defaults = json.load(fh) except FileNotFoundError: pass else: keys = set(defaults.keys()) - set(files[DEFAULTS_CONF_FILE].keys()) if keys: if RETURN_TYPE == 'json': output.append(['error', "Keys missing in your 'defaults.conf' file: %r" % keys]) else: self.logger.error("Keys missing in your 'defaults.conf' file: %r", keys) if RETURN_TYPE == 'json': output.append(['info', 'Checking runtime configuration.']) else: self.logger.info('Checking runtime configuration.') http_proxy = files[DEFAULTS_CONF_FILE].get('http_proxy') https_proxy = files[DEFAULTS_CONF_FILE].get('https_proxy') # Either both are given or both are not given if (not http_proxy or not https_proxy) and not (http_proxy == https_proxy): if RETURN_TYPE == 'json': output.append(['warning', 'Incomplete configuration: Both http and https proxies must be set.']) else: self.logger.warning('Incomplete configuration: Both http and https proxies must be set.') retval = 1 if RETURN_TYPE == 'json': output.append(['info', 'Checking runtime and pipeline configuration.']) else: self.logger.info('Checking runtime and pipeline configuration.') all_queues = set() for bot_id, bot_config in files[RUNTIME_CONF_FILE].items(): # pipeline keys for field in ['description', 'group', 'module', 'name']: if field not in bot_config: if RETURN_TYPE == 'json': output.append(['warning', 'Bot %r has no %r.' % (bot_id, field)]) else: self.logger.warning('Bot %r has no %r.', bot_id, field) retval = 1 if 'module' in bot_config and bot_config['module'] == 'bots.collectors.n6.collector_stomp': if RETURN_TYPE == 'json': output.append(['warning', "The module 'bots.collectors.n6.collector_stomp' is deprecated and will be removed in " "version 2.0. Please use intelmq.bots.collectors." "stomp.collector instead for bot %r." % bot_id]) else: self.logger.warning("The module 'bots.collectors.n6.collector_stomp' is deprecated and will be removed in " "version 2.0. Please use intelmq.bots.collectors." "stomp.collector instead for bot %r." % bot_id) if 'run_mode' in bot_config and bot_config['run_mode'] not in ['continuous', 'scheduled']: message = "Bot %r has invalid `run_mode` %r. Must be 'continuous' or 'scheduled'." if RETURN_TYPE == 'json': output.append(['warning', message % (bot_id, bot_config['run_mode'])]) else: self.logger.warning(message, bot_id, bot_config['run_mode']) retval = 1 if bot_id not in files[PIPELINE_CONF_FILE]: if RETURN_TYPE == 'json': output.append(['error', 'Misconfiguration: No pipeline configuration found for %r.' % bot_id]) else: self.logger.error('Misconfiguration: No pipeline configuration found for %r.', bot_id) retval = 1 else: if ('group' in bot_config and bot_config['group'] in ['Collector', 'Parser', 'Expert']): if ('destination-queues' not in files[PIPELINE_CONF_FILE][bot_id] or (not isinstance(files[PIPELINE_CONF_FILE][bot_id]['destination-queues'], list) or len(files[PIPELINE_CONF_FILE][bot_id]['destination-queues']) < 1)): if RETURN_TYPE == 'json': output.append(['error', 'Misconfiguration: No destination queues for %r.' % bot_id]) else: self.logger.error('Misconfiguration: No destination queues for %r.', bot_id) retval = 1 else: all_queues = all_queues.union(files[PIPELINE_CONF_FILE][bot_id]['destination-queues']) if ('group' in bot_config and bot_config['group'] in ['Parser', 'Expert', 'Output']): if ('source-queue' not in files[PIPELINE_CONF_FILE][bot_id] or not isinstance(files[PIPELINE_CONF_FILE][bot_id]['source-queue'], str)): if RETURN_TYPE == 'json': output.append(['error', 'Misconfiguration: No source queue for %r.' % bot_id]) else: self.logger.error('Misconfiguration: No source queue for %r.', bot_id) retval = 1 else: all_queues.add(files[PIPELINE_CONF_FILE][bot_id]['source-queue']) all_queues.add(files[PIPELINE_CONF_FILE][bot_id]['source-queue'] + '-internal') if not no_connections: try: pipeline = PipelineFactory.create(self.parameters) pipeline.set_queues(None, "source") pipeline.connect() orphan_queues = "', '".join({a.decode() for a in pipeline.pipe.keys()} - all_queues) except Exception as exc: error = utils.error_message_from_exc(exc) if RETURN_TYPE == 'json': output.append(['error', 'Could not connect to redis pipeline: %s' % error]) else: self.logger.error('Could not connect to redis pipeline: %s', error) retval = 1 else: if orphan_queues: if RETURN_TYPE == 'json': output.append(['warning', "Orphaned queues found: '%s'." % orphan_queues]) else: self.logger.warning("Orphaned queues found: '%s'.", orphan_queues) if RETURN_TYPE == 'json': output.append(['info', 'Checking harmonization configuration.']) else: self.logger.info('Checking harmonization configuration.') for event_type, event_type_conf in files[HARMONIZATION_CONF_FILE].items(): for harm_type_name, harm_type in event_type_conf.items(): if "description" not in harm_type: if RETURN_TYPE == 'json': output.append(['warn', 'Missing description for type %r.' % harm_type_name]) else: self.logger.warn('Missing description for type %r.', harm_type_name) if "type" not in harm_type: if RETURN_TYPE == 'json': output.append(['error', 'Missing type for type %r.' % harm_type_name]) else: self.logger.error('Missing type for type %r.', harm_type_name) retval = 1 continue if "regex" in harm_type: try: re.compile(harm_type['regex']) except Exception as e: if RETURN_TYPE == 'json': output.append(['error', 'Invalid regex for type %r: %r.' % (harm_type_name, str(e))]) else: self.logger.error('Invalid regex for type %r: %r.', harm_type_name, str(e)) retval = 1 continue extra_type = files[HARMONIZATION_CONF_FILE].get('event', {}).get('extra', {}).get('type') if extra_type != 'JSONDict': if RETURN_TYPE == 'json': output.append(['warning', "'extra' field needs to be of type 'JSONDict'."]) else: self.logger.warning("'extra' field needs to be of type 'JSONDict'.") retval = 1 if RETURN_TYPE == 'json': output.append(['info', 'Checking for bots.']) else: self.logger.info('Checking for bots.') for bot_id, bot_config in files[RUNTIME_CONF_FILE].items(): # importable module try: bot_module = importlib.import_module(bot_config['module']) except ImportError as exc: if RETURN_TYPE == 'json': output.append(['error', 'Incomplete installation: Bot %r not importable: %r.' % (bot_id, exc)]) else: self.logger.error('Incomplete installation: Bot %r not importable: %r.', bot_id, exc) retval = 1 continue bot = getattr(bot_module, 'BOT') bot_parameters = files[DEFAULTS_CONF_FILE].copy() bot_parameters.update(bot_config['parameters']) bot_check = bot.check(bot_parameters) if bot_check: if RETURN_TYPE == 'json': output.extend(bot_check) else: for log_line in bot_check: getattr(self.logger, log_line[0])("Bot %r: %s" % (bot_id, log_line[1])) for group in files[BOTS_FILE].values(): for bot_id, bot in group.items(): if subprocess.call(['which', bot['module']], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL): if RETURN_TYPE == 'json': output.append(['error', 'Incomplete installation: Executable %r for %r not found.' % (bot['module'], bot_id)]) else: self.logger.error('Incomplete installation: Executable %r for %r not found.', bot['module'], bot_id) retval = 1 if RETURN_TYPE == 'json': if retval: return 0, {'status': 'error', 'lines': output} else: return 1, {'status': 'success', 'lines': output} else: if retval: self.logger.error('Some issues have been found, please check the above output.') return retval, 'error' else: self.logger.info('No issues found.') return retval, 'success'