Exemple #1
0
    def run(self, stream):
        job_name = ''
        abort = self.get_abort()
        stop = self.get_stop()
        settings = self.get_settings()

        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']
        logger.trace(job_name, 'worker #' + str(self.get_id()) + " started.")

        while self.is_running():
            try:
                url = stream['url']
                username = stream['username']
                password = stream['password']
                auth = (username, password)

                # when streaming works the code stops here.
                # this is the reason why we use a Process instead of a Thread. A process can be terminated.
                StreamScraper.listen(connector, job_name, stream, url, auth)

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)
                connector.close()
                logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Exemple #2
0
    def run(self, scheduled_event, test_mode=False):
        abort = self.get_abort()
        stop = self.get_stop()
        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        settings = self.get_settings()
        while self.is_running():
            try:
                now = datetime.now()
                scheduled_event_repository = ScheduledEventRepository(
                    connector)
                scheduled_event = scheduled_event_repository.find_by_hashcode(
                    scheduled_event['hashcode'])
                job_repository = JobRepository(connector)
                event_repository = EventRepository(connector)

                repetition = scheduled_event['repetition']
                intermission = int(scheduled_event['intermission'])
                expected = datetime.strptime(scheduled_event['expected'],
                                             "%Y-%m-%d %H:%M:%S")
                next_expected = None
                if expected < now:
                    if repetition == 'HOURS':
                        next_expected = expected + timedelta(
                            hours=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(hours=int(intermission))
                    if repetition == 'DAYS':
                        next_expected = expected + timedelta(
                            days=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(days=int(intermission))
                    if repetition == 'MINUTES':
                        next_expected = expected + timedelta(
                            minutes=int(intermission))
                        while next_expected < now:
                            next_expected += timedelta(
                                minutes=int(intermission))

                    if next_expected:
                        message = dict()
                        message['id'] = scheduled_event['id']
                        message['expected'] = next_expected.__str__()
                        scheduled_event_repository.expected(message)

                    job = job_repository.job()
                    event_job_name = job['job_name']

                    process = ProcessService.create_process(
                        connector, scheduled_event['pdn_id'], event_job_name,
                        scheduled_event['e_pdn_id'])

                    event = dict()
                    event['state'] = 'READY'
                    event['repetition'] = repetition
                    event['intermission'] = intermission
                    event['expected'] = scheduled_event['expected']
                    event['job_name'] = event_job_name
                    event['pcs_id'] = process['id']
                    event = event_repository.insert(event)

                    process_property_repository = ProcessPropertyRepository(
                        connector)
                    process_property_repository.set_property(
                        process, 'event_id', event['id'])

                    ProcessService.execute_process(connector, process)

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                # check for test_mode, break the loop
                if test_mode:
                    self.running(False)

                connector.close()
                time.sleep(settings['worker_idle_delay'])
                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Exemple #3
0
    def run(self, queue):
        abort = self.get_abort()
        claim = self.get_claim()
        stop = self.get_stop()
        settings = self.get_settings()

        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)

        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        message = None
        while self.is_running():
            try:
                message = None

                # retrieve the next message
                message_repository = MessageRepository(connector)
                message = message_repository.dequeue(claim, queue)

                if message:
                    consumer = ClassLoader.find(message['consumer'])
                    consumer.action(connector, message)

                    message_repository.state(queue, message, 'PROCESSED')

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                # no message to process
                if not message:
                    connector.close()
                    time.sleep(settings['worker_idle_delay'])

                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                if message:
                    try:
                        message_repository = MessageRepository(connector)
                        result['id'] = message['id']
                        message_repository.state(queue, result, 'FAILED')
                    except:
                        logger.fatal(job_name,
                                     'Failed to persist message failure')

                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])
Exemple #4
0
    def run(self, location):
        abort = self.get_abort()
        stop = self.get_stop()
        paprika_ds = DatasourceBuilder.build('paprika-ds.json')
        connector = ConnectorFactory.create_connector(paprika_ds)
        logger = Logger(connector, self)

        job_repository = JobRepository(connector)
        job = job_repository.job()
        job_name = job['job_name']

        settings = self.get_settings()
        while self.is_running():
            try:
                properties = PropertyRepository(connector)
                registry = FileRepository(connector)
                rule_repository = RuleRepository(connector)

                excluded_extensions = properties.get_property(
                    'scanner.excluded_extensions')
                stable_check_delay = properties.get_property(
                    'scanner.stable_check_delay')

                url = location['url']
                patterns = location['patterns']
                client = VfsFactory.create_client(url)
                client.set_excluded_extensions(excluded_extensions)
                client.set_stable_check_delay(int(stable_check_delay))
                client.set_regular_expressions(patterns)
                path = client.get_path()
                recursive = int(location['recursive'])
                depth = int(location['depth'])

                client.connect()
                files = client.list_stable(path,
                                           recursive=recursive,
                                           depth=depth)
                for file in files:
                    registered_file = registry.get_by_hashcode(
                        file['hashcode'])
                    if not registered_file:

                        # find the rule
                        found_rule = None
                        rules = rule_repository.find_by_location(location)
                        for rule in rules:
                            if Matcher.match(ReMethod, rule['pattern'],
                                             file['filename']):
                                found_rule = rule
                        if not found_rule:
                            found_rule = rule_repository.find_failsafe()

                        job = job_repository.job()
                        file_job_name = job['job_name']

                        logger.info(
                            file_job_name, "file: " + file['url'] + '/' +
                            file['filename'] + " rule: " + found_rule['rule'] +
                            " hascode:" + file['hashcode'])

                        process = ProcessService.create_process(
                            connector, found_rule['pdn_id'], file_job_name,
                            found_rule['e_pdn_id'])

                        message = dict()
                        message['job_name'] = file_job_name
                        message['filename'] = file['filename']
                        message['path'] = file['path']
                        message['pattern'] = found_rule['pattern']
                        message['rle_id'] = found_rule['id']
                        message['rule'] = found_rule['rule']
                        message['pickup_location'] = file['url']
                        message['filesize'] = file['size']
                        message['hashcode'] = file['hashcode']
                        message['pcs_id'] = process['id']
                        message['state'] = 'READY'
                        registered_file = registry.insert(message)

                        process_property_repository = ProcessPropertyRepository(
                            connector)
                        process_property_repository.set_property(
                            process, 'file_id', registered_file['id'])
                        process_property_repository.set_property(
                            process, 'pickup_location', file['url'])
                        process_property_repository.set_property(
                            process, 'path', file['path'])
                        process_property_repository.set_property(
                            process, 'payload',
                            json.dumps({
                                'filename': file['filename'],
                                'job_name': file_job_name
                            }))

                        ProcessService.execute_process(connector, process)

                client.close()

                # check if we need to abort, can be called from the main thread or other thread
                aborted = abort.is_aborted()
                self.running(not aborted)

                # check if we need to stop, will be set by the agent's WatchWorker thread
                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                connector.close()
                time.sleep(settings['worker_idle_delay'])
                logger.trace(job_name,
                             'worker #' + str(self.get_id()) + " executed.")
            except:
                aborted = abort.is_aborted()
                self.running(not aborted)

                if not aborted:
                    stopped = stop.is_stopped()
                    self.running(not stopped)

                result = Traceback.build()
                logger.fatal(job_name, result['message'], result['backtrace'])
                connector.close()
                time.sleep(settings['worker_exception_delay'])