Ejemplo n.º 1
0
    def run(self):
        abort = self.get_abort()
        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)

        logger = Logger(connector, self)
        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']
        settings = self.get_settings()
        threads = self.get_threads()
        while self.is_running():
            try:

                watcher = self.get_watcher()
                watcher.watch(connector, settings, threads, abort)

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)
                connector.close()
                time.sleep(settings['watcher_idle_delay'])

            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Ejemplo n.º 2
0
    def run(self, stream):
        job_name = ''
        abort = self.get_abort()
        stop = self.get_stop()
        settings = self.get_settings()

        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']
        logger.trace(job_name, 'worker #' + str(self.get_id()) + " started.")

        while self.is_running():
            try:
                url = stream['url']
                username = stream['username']
                password = stream['password']
                auth = (username, password)

                # when streaming works the code stops here.
                # this is the reason why we use a Process instead of a Thread. A process can be terminated.
                StreamScraper.listen(connector, job_name, stream, url, auth)

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)
                connector.close()
                logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Ejemplo n.º 3
0
    def action(self, connector, message):
        try:
            process = json.loads(message['payload'])
            process_repository = ProcessRepository(connector)

            process['state'] = 'PROCESSING'
            process['message'] = ''
            process['backtrace'] = ''
            process_repository.state(process)

            process_definition_repository = ProcessDefinitionRepository(connector)
            process_definition_action_repository = ProcessDefinitionActionRepository(connector)
            process_definition = process_definition_repository.find_by_id(process['pdn_id'])
            process_definition_action = process_definition_action_repository.find_first_by_process_definition(process_definition)

            process_action_repository = ProcessActionRepository(connector)
            process_action = dict()
            process_action['job_name'] = process['job_name']
            process_action['pcs_id'] = process['id']
            process_action['dan_id'] = process_definition_action['id']
            process_action['name'] = process_definition_action['name']
            process_action['state'] = 'READY'
            process_action = process_action_repository.insert(process_action)

            process_action_property_repository = ProcessActionPropertyRepository(connector)
            process_definition_action_property_repository = ProcessDefinitionActionPropertyRepository(connector)
            process_definition_action_properties = process_definition_action_property_repository.list_by_process_definition_action(process_definition_action)
            for process_definition_action_property in process_definition_action_properties:
                process_action_property_repository.set_property(process_action, process_definition_action_property['name'], process_definition_action_property['value'])

            payload = process_action
            Message.enqueue(connector, process['queue'], payload, 'message', 'paprika.consumers.ProcessAction.ProcessAction')
        except:
            process = json.loads(message['payload'])
            paprika_ds = DatasourceBuilder.build('paprika-ds.json')
            process_repository = ProcessRepository(paprika_ds)
            result = Traceback.build()
            result['id'] = process['id']
            result['state'] = 'FAILED'
            process_repository.state(result)

            logger = Logger(connector, self)
            logger.fatal(process['job_name'], result['message'], result['backtrace'])
Ejemplo n.º 4
0
    def action(self, connector, message):
        try:
            process = json.loads(message['payload'])
            process_repository = ProcessRepository(connector)

            process['state'] = 'PROCESSED'
            process['message'] = ''
            process['backtrace'] = ''
            process_repository.state(process)
        except:
            process = json.loads(message['payload'])
            process_repository = ProcessRepository(connector)
            result = Traceback.build()
            result['id'] = process['id']
            result['state'] = 'FAILED'
            process_repository.state(result)

            logger = Logger(connector, self)
            logger.fatal(process['job_name'], result['message'],
                         result['backtrace'])
Ejemplo n.º 5
0
    def action(self, connector, message):
        try:
            process_action = json.loads(message['payload'])
            process_action_repository = ProcessActionRepository(connector)

            # set the state of the process action
            process_action['state'] = 'PROCESSING'
            process_action['message'] = ''
            process_action['backtrace'] = ''
            process_action_repository.state(process_action)

            process_action_property_repository = ProcessActionPropertyRepository(
                connector)
            action = ClassLoader.find(
                process_action_property_repository.get_property(
                    process_action, 'action'))
            payload = action.execute(connector, process_action)
            if payload:
                process_repository = ProcessRepository(connector)
                process_action_property_repository = ProcessActionPropertyRepository(
                    connector)
                process = process_repository.find_by_id(payload['pcs_id'])
                sleep = float(
                    process_action_property_repository.get_property(
                        process_action, 'sleep'))
                now = datetime.now()
                delay = now + timedelta(seconds=int(sleep))
                delay = delay.strftime('%Y-%m-%d %H:%M:%S')
                Message.enqueue_wait(
                    connector, process['queue'], delay, payload, 'message',
                    'paprika.consumers.ProcessAction.ProcessAction')
            else:
                process_repository = ProcessRepository(connector)
                process = process_repository.find_by_id(
                    process_action['pcs_id'])
                process_definition_action_repository = ProcessDefinitionActionRepository(
                    connector)
                process_definition_action = process_definition_action_repository.find_next_by_process_action(
                    process_action, process)
                if process_definition_action:
                    next_process_action = dict()
                    next_process_action['job_name'] = process_action[
                        'job_name']
                    next_process_action['pcs_id'] = process_action['pcs_id']
                    next_process_action['dan_id'] = process_definition_action[
                        'id']
                    next_process_action['name'] = process_definition_action[
                        'name']
                    next_process_action['state'] = 'READY'
                    next_process_action = process_action_repository.insert(
                        next_process_action)

                    process_definition_action_property_repository = ProcessDefinitionActionPropertyRepository(
                        connector)
                    process_definition_action_properties = process_definition_action_property_repository.list_by_process_definition_action(
                        process_definition_action)
                    for process_definition_action_property in process_definition_action_properties:
                        process_action_property_repository.set_property(
                            next_process_action,
                            process_definition_action_property['name'],
                            process_definition_action_property['value'])

                    payload = next_process_action
                    Message.enqueue(
                        connector, process['queue'], payload, 'message',
                        'paprika.consumers.ProcessAction.ProcessAction')
                else:
                    payload = process_repository.find_by_id(
                        process_action['pcs_id'])
                    Message.enqueue(
                        connector, process['queue'], payload, 'message',
                        'paprika.consumers.ProcessFinish.ProcessFinish')

                process_action['state'] = 'PROCESSED'
                process_action['message'] = ''
                process_action['backtrace'] = ''
                process_action_repository.state(process_action)
        except:
            # set the process_action to failed
            process_action = json.loads(message['payload'])
            process_action_repository = ProcessActionRepository(connector)
            result = Traceback.build()
            result['id'] = process_action['id']
            result['state'] = 'FAILED'
            process_action_repository.state(result)

            # set the process to failed
            process_repository = ProcessRepository(connector)
            process = process_repository.find_by_id(process_action['pcs_id'])
            result['id'] = process['id']
            result['state'] = 'FAILED'
            process_repository.state(result)

            # log a fatal of the process
            logger = Logger(connector, self)
            logger.fatal(process['job_name'], result['message'],
                         result['backtrace'])

            # start the exception process if present
            if process['e_pdn_id']:
                e_process = ProcessService.create_process(
                    connector, process['e_pdn_id'], process['job_name'])

                process_property_repository = ProcessPropertyRepository(
                    connector)
                process_property_repository.copy(process, e_process)
                process_property_repository.set_property(
                    e_process, 'message', result['message'])
                process_property_repository.set_property(
                    e_process, 'backtrace', result['backtrace'])

                ProcessService.execute_process(connector, e_process)
Ejemplo n.º 6
0
    def run(self, queue):
        abort = self.get_abort()
        claim = self.get_claim()
        stop = self.get_stop()
        settings = self.get_settings()

        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)

        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        message = None
        while self.is_running():
            try:
                message = None

                # retrieve the next message
                message_repository = MessageRepository(connector)
                message = message_repository.dequeue(claim, queue)

                if message:
                    consumer = ClassLoader.find(message['consumer'])
                    consumer.action(connector, message)

                    message_repository.state(queue, message, 'PROCESSED')

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                # no message to process
                if not message:
                    connector.close()
                    time.sleep(settings['worker_idle_delay'])

                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                if message:
                    try:
                        message_repository = MessageRepository(connector)
                        result['id'] = message['id']
                        message_repository.state(queue, result, 'FAILED')
                    except:
                        logger.fatal(job_name,
                                     'Failed to persist message failure')

                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Ejemplo n.º 7
0
    def run(self, scheduled_event, test_mode=False):
        abort = self.get_abort()
        stop = self.get_stop()
        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        settings = self.get_settings()
        while self.is_running():
            try:
                now = datetime.now()
                scheduled_event_repository = ScheduledEventRepository(
                    connector)
                scheduled_event = scheduled_event_repository.find_by_hashcode(
                    scheduled_event['hashcode'])
                job_repository = JobRepository(connector)
                event_repository = EventRepository(connector)

                repetition = scheduled_event['repetition']
                intermission = int(scheduled_event['intermission'])
                expected = datetime.strptime(scheduled_event['expected'],
                                             "%Y-%m-%d %H:%M:%S")
                next_expected = None
                if expected < now:
                    if repetition == 'HOURS':
                        next_expected = expected + timedelta(
                            hours=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(hours=int(intermission))
                    if repetition == 'DAYS':
                        next_expected = expected + timedelta(
                            days=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(days=int(intermission))
                    if repetition == 'MINUTES':
                        next_expected = expected + timedelta(
                            minutes=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(
                                minutes=int(intermission))

                    if next_expected:
                        message = dict()
                        message['id'] = scheduled_event['id']
                        message['expected'] = next_expected.__str__()
                        scheduled_event_repository.expected(message)

                    job = job_repository.job()
                    event_job_name = job['job_name']

                    process = ProcessService.create_process(
                        connector, scheduled_event['pdn_id'], event_job_name,
                        scheduled_event['e_pdn_id'])

                    event = dict()
                    event['state'] = 'READY'
                    event['repetition'] = repetition
                    event['intermission'] = intermission
                    event['expected'] = scheduled_event['expected']
                    event['job_name'] = event_job_name
                    event['pcs_id'] = process['id']
                    event = event_repository.insert(event)

                    process_property_repository = ProcessPropertyRepository(
                        connector)
                    process_property_repository.set_property(
                        process, 'event_id', event['id'])

                    ProcessService.execute_process(connector, process)

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                # check for test_mode, break the loop
                if test_mode:
                    self.running(False)

                connector.close()
                time.sleep(settings['worker_idle_delay'])
                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Ejemplo n.º 8
0
    def run(self, location):
        abort = self.get_abort()
        stop = self.get_stop()
        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        settings = self.get_settings()
        while self.is_running():
            try:
                properties = PropertyRepository(connector)
                registry = FileRepository(connector)
                rule_repository = RuleRepository(connector)

                excluded_extensions = properties.get_property(
                    'scanner.excluded_extensions')
                stable_check_delay = properties.get_property(
                    'scanner.stable_check_delay')

                url = location['url']
                patterns = location['patterns']
                client = VfsFactory.create_client(url)
                client.set_excluded_extensions(excluded_extensions)
                client.set_stable_check_delay(int(stable_check_delay))
                client.set_regular_expressions(patterns)
                path = client.get_path()
                recursive = int(location['recursive'])
                depth = int(location['depth'])

                client.connect()
                files = client.list_stable(path,
                                           recursive=recursive,
                                           depth=depth)
                for file in files:
                    registered_file = registry.get_by_hashcode(
                        file['hashcode'])
                    if not registered_file:

                        # find the rule
                        found_rule = None
                        rules = rule_repository.find_by_location(location)
                        for rule in rules:
                            if Matcher.match(ReMethod, rule['pattern'],
                                             file['filename']):
                                found_rule = rule
                        if not found_rule:
                            found_rule = rule_repository.find_failsafe()

                        job = job_repository.job()
                        file_job_name = job['job_name']

                        logger.info(
                            file_job_name, "file: " + file['url'] + '/' +
                            file['filename'] + " rule: " + found_rule['rule'] +
                            " hascode:" + file['hashcode'])

                        process = ProcessService.create_process(
                            connector, found_rule['pdn_id'], file_job_name,
                            found_rule['e_pdn_id'])

                        message = dict()
                        message['job_name'] = file_job_name
                        message['filename'] = file['filename']
                        message['path'] = file['path']
                        message['pattern'] = found_rule['pattern']
                        message['rle_id'] = found_rule['id']
                        message['rule'] = found_rule['rule']
                        message['pickup_location'] = file['url']
                        message['filesize'] = file['size']
                        message['hashcode'] = file['hashcode']
                        message['pcs_id'] = process['id']
                        message['state'] = 'READY'
                        registered_file = registry.insert(message)

                        process_property_repository = ProcessPropertyRepository(
                            connector)
                        process_property_repository.set_property(
                            process, 'file_id', registered_file['id'])
                        process_property_repository.set_property(
                            process, 'pickup_location', file['url'])
                        process_property_repository.set_property(
                            process, 'path', file['path'])
                        process_property_repository.set_property(
                            process, 'payload',
                            json.dumps({
                                'filename': file['filename'],
                                'job_name': file_job_name
                            }))

                        ProcessService.execute_process(connector, process)

                client.close()

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                connector.close()
                time.sleep(settings['worker_idle_delay'])
                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])