def run(self): abort = self.get_abort() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] settings = self.get_settings() threads = self.get_threads() while self.is_running(): try: watcher = self.get_watcher() watcher.watch(connector, settings, threads, abort) # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) connector.close() time.sleep(settings['watcher_idle_delay']) except: aborted = abort.is_aborted() self.running(not aborted) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, stream): job_name = '' abort = self.get_abort() stop = self.get_stop() settings = self.get_settings() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] logger.trace(job_name, 'worker #' + str(self.get_id()) + " started.") while self.is_running(): try: url = stream['url'] username = stream['username'] password = stream['password'] auth = (username, password) # when streaming works the code stops here. # this is the reason why we use a Process instead of a Thread. A process can be terminated. StreamScraper.listen(connector, job_name, stream, url, auth) # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) connector.close() logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def action(self, connector, message): try: process = json.loads(message['payload']) process_repository = ProcessRepository(connector) process['state'] = 'PROCESSING' process['message'] = '' process['backtrace'] = '' process_repository.state(process) process_definition_repository = ProcessDefinitionRepository(connector) process_definition_action_repository = ProcessDefinitionActionRepository(connector) process_definition = process_definition_repository.find_by_id(process['pdn_id']) process_definition_action = process_definition_action_repository.find_first_by_process_definition(process_definition) process_action_repository = ProcessActionRepository(connector) process_action = dict() process_action['job_name'] = process['job_name'] process_action['pcs_id'] = process['id'] process_action['dan_id'] = process_definition_action['id'] process_action['name'] = process_definition_action['name'] process_action['state'] = 'READY' process_action = process_action_repository.insert(process_action) process_action_property_repository = ProcessActionPropertyRepository(connector) process_definition_action_property_repository = ProcessDefinitionActionPropertyRepository(connector) process_definition_action_properties = process_definition_action_property_repository.list_by_process_definition_action(process_definition_action) for process_definition_action_property in process_definition_action_properties: process_action_property_repository.set_property(process_action, process_definition_action_property['name'], process_definition_action_property['value']) payload = process_action Message.enqueue(connector, process['queue'], payload, 'message', 'paprika.consumers.ProcessAction.ProcessAction') except: process = json.loads(message['payload']) paprika_ds = DatasourceBuilder.build('paprika-ds.json') process_repository = ProcessRepository(paprika_ds) result = Traceback.build() result['id'] = process['id'] result['state'] = 'FAILED' process_repository.state(result) logger = Logger(connector, self) logger.fatal(process['job_name'], result['message'], result['backtrace'])
def action(self, connector, message): try: process = json.loads(message['payload']) process_repository = ProcessRepository(connector) process['state'] = 'PROCESSED' process['message'] = '' process['backtrace'] = '' process_repository.state(process) except: process = json.loads(message['payload']) process_repository = ProcessRepository(connector) result = Traceback.build() result['id'] = process['id'] result['state'] = 'FAILED' process_repository.state(result) logger = Logger(connector, self) logger.fatal(process['job_name'], result['message'], result['backtrace'])
def action(self, connector, message): try: process_action = json.loads(message['payload']) process_action_repository = ProcessActionRepository(connector) # set the state of the process action process_action['state'] = 'PROCESSING' process_action['message'] = '' process_action['backtrace'] = '' process_action_repository.state(process_action) process_action_property_repository = ProcessActionPropertyRepository( connector) action = ClassLoader.find( process_action_property_repository.get_property( process_action, 'action')) payload = action.execute(connector, process_action) if payload: process_repository = ProcessRepository(connector) process_action_property_repository = ProcessActionPropertyRepository( connector) process = process_repository.find_by_id(payload['pcs_id']) sleep = float( process_action_property_repository.get_property( process_action, 'sleep')) now = datetime.now() delay = now + timedelta(seconds=int(sleep)) delay = delay.strftime('%Y-%m-%d %H:%M:%S') Message.enqueue_wait( connector, process['queue'], delay, payload, 'message', 'paprika.consumers.ProcessAction.ProcessAction') else: process_repository = ProcessRepository(connector) process = process_repository.find_by_id( process_action['pcs_id']) process_definition_action_repository = ProcessDefinitionActionRepository( connector) process_definition_action = process_definition_action_repository.find_next_by_process_action( process_action, process) if process_definition_action: next_process_action = dict() next_process_action['job_name'] = process_action[ 'job_name'] next_process_action['pcs_id'] = process_action['pcs_id'] next_process_action['dan_id'] = process_definition_action[ 'id'] next_process_action['name'] = process_definition_action[ 'name'] next_process_action['state'] = 'READY' next_process_action = process_action_repository.insert( next_process_action) process_definition_action_property_repository = ProcessDefinitionActionPropertyRepository( connector) process_definition_action_properties = process_definition_action_property_repository.list_by_process_definition_action( process_definition_action) for process_definition_action_property in process_definition_action_properties: process_action_property_repository.set_property( next_process_action, process_definition_action_property['name'], process_definition_action_property['value']) payload = next_process_action Message.enqueue( connector, process['queue'], payload, 'message', 'paprika.consumers.ProcessAction.ProcessAction') else: payload = process_repository.find_by_id( process_action['pcs_id']) Message.enqueue( connector, process['queue'], payload, 'message', 'paprika.consumers.ProcessFinish.ProcessFinish') process_action['state'] = 'PROCESSED' process_action['message'] = '' process_action['backtrace'] = '' process_action_repository.state(process_action) except: # set the process_action to failed process_action = json.loads(message['payload']) process_action_repository = ProcessActionRepository(connector) result = Traceback.build() result['id'] = process_action['id'] result['state'] = 'FAILED' process_action_repository.state(result) # set the process to failed process_repository = ProcessRepository(connector) process = process_repository.find_by_id(process_action['pcs_id']) result['id'] = process['id'] result['state'] = 'FAILED' process_repository.state(result) # log a fatal of the process logger = Logger(connector, self) logger.fatal(process['job_name'], result['message'], result['backtrace']) # start the exception process if present if process['e_pdn_id']: e_process = ProcessService.create_process( connector, process['e_pdn_id'], process['job_name']) process_property_repository = ProcessPropertyRepository( connector) process_property_repository.copy(process, e_process) process_property_repository.set_property( e_process, 'message', result['message']) process_property_repository.set_property( e_process, 'backtrace', result['backtrace']) ProcessService.execute_process(connector, e_process)
def run(self, queue): abort = self.get_abort() claim = self.get_claim() stop = self.get_stop() settings = self.get_settings() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] message = None while self.is_running(): try: message = None # retrieve the next message message_repository = MessageRepository(connector) message = message_repository.dequeue(claim, queue) if message: consumer = ClassLoader.find(message['consumer']) consumer.action(connector, message) message_repository.state(queue, message, 'PROCESSED') # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) # no message to process if not message: connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() if message: try: message_repository = MessageRepository(connector) result['id'] = message['id'] message_repository.state(queue, result, 'FAILED') except: logger.fatal(job_name, 'Failed to persist message failure') logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, scheduled_event, test_mode=False): abort = self.get_abort() stop = self.get_stop() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] settings = self.get_settings() while self.is_running(): try: now = datetime.now() scheduled_event_repository = ScheduledEventRepository( connector) scheduled_event = scheduled_event_repository.find_by_hashcode( scheduled_event['hashcode']) job_repository = JobRepository(connector) event_repository = EventRepository(connector) repetition = scheduled_event['repetition'] intermission = int(scheduled_event['intermission']) expected = datetime.strptime(scheduled_event['expected'], "%Y-%m-%d %H:%M:%S") next_expected = None if expected < now: if repetition == 'HOURS': next_expected = expected + timedelta( hours=int(intermission)) while next_expected < now: next_expected += timedelta(hours=int(intermission)) if repetition == 'DAYS': next_expected = expected + timedelta( days=int(intermission)) while next_expected < now: next_expected += timedelta(days=int(intermission)) if repetition == 'MINUTES': next_expected = expected + timedelta( minutes=int(intermission)) while next_expected < now: next_expected += timedelta( minutes=int(intermission)) if next_expected: message = dict() message['id'] = scheduled_event['id'] message['expected'] = next_expected.__str__() scheduled_event_repository.expected(message) job = job_repository.job() event_job_name = job['job_name'] process = ProcessService.create_process( connector, scheduled_event['pdn_id'], event_job_name, scheduled_event['e_pdn_id']) event = dict() event['state'] = 'READY' event['repetition'] = repetition event['intermission'] = intermission event['expected'] = scheduled_event['expected'] event['job_name'] = event_job_name event['pcs_id'] = process['id'] event = event_repository.insert(event) process_property_repository = ProcessPropertyRepository( connector) process_property_repository.set_property( process, 'event_id', event['id']) ProcessService.execute_process(connector, process) # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) # check for test_mode, break the loop if test_mode: self.running(False) connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, location): abort = self.get_abort() stop = self.get_stop() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] settings = self.get_settings() while self.is_running(): try: properties = PropertyRepository(connector) registry = FileRepository(connector) rule_repository = RuleRepository(connector) excluded_extensions = properties.get_property( 'scanner.excluded_extensions') stable_check_delay = properties.get_property( 'scanner.stable_check_delay') url = location['url'] patterns = location['patterns'] client = VfsFactory.create_client(url) client.set_excluded_extensions(excluded_extensions) client.set_stable_check_delay(int(stable_check_delay)) client.set_regular_expressions(patterns) path = client.get_path() recursive = int(location['recursive']) depth = int(location['depth']) client.connect() files = client.list_stable(path, recursive=recursive, depth=depth) for file in files: registered_file = registry.get_by_hashcode( file['hashcode']) if not registered_file: # find the rule found_rule = None rules = rule_repository.find_by_location(location) for rule in rules: if Matcher.match(ReMethod, rule['pattern'], file['filename']): found_rule = rule if not found_rule: found_rule = rule_repository.find_failsafe() job = job_repository.job() file_job_name = job['job_name'] logger.info( file_job_name, "file: " + file['url'] + '/' + file['filename'] + " rule: " + found_rule['rule'] + " hascode:" + file['hashcode']) process = ProcessService.create_process( connector, found_rule['pdn_id'], file_job_name, found_rule['e_pdn_id']) message = dict() message['job_name'] = file_job_name message['filename'] = file['filename'] message['path'] = file['path'] message['pattern'] = found_rule['pattern'] message['rle_id'] = found_rule['id'] message['rule'] = found_rule['rule'] message['pickup_location'] = file['url'] message['filesize'] = file['size'] message['hashcode'] = file['hashcode'] message['pcs_id'] = process['id'] message['state'] = 'READY' registered_file = registry.insert(message) process_property_repository = ProcessPropertyRepository( connector) process_property_repository.set_property( process, 'file_id', registered_file['id']) process_property_repository.set_property( process, 'pickup_location', file['url']) process_property_repository.set_property( process, 'path', file['path']) process_property_repository.set_property( process, 'payload', json.dumps({ 'filename': file['filename'], 'job_name': file_job_name })) ProcessService.execute_process(connector, process) client.close() # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])