Ejemplo n.º 1
0
    def run_pipeline_by_id(cls, id):
        """Runs a pipeline by id.

        :param str id:
        """
        log.info('Running pipeline: {}'.format(id))

        PipelineScheduleService.pre_run_schedule(id)

        # get graph
        graph_data = TaskConnectionService.build_graph_for_pipeline(id)

        # process graph
        queue = deque()

        for source_id in graph_data['source_ids']:
            queue.appendleft((source_id, None))

        while len(queue):
            task_id, data = queue.pop()

            try:
                task_response = TaskService.process_task_with_data(task_id, data)

                try:
                    for next_id in graph_data['graph'][task_id]:
                        queue.appendleft((next_id, task_response))
                except KeyError:
                    # end of list
                    pass
            except StopProcessingException:
                pass

        PipelineScheduleService.post_run_schedule(id)
    def test_write_pipeline_schedule(self):
        """Test that we can write an entity to the repository."""
        self.install_fixture('pipeline_schedule_interval')

        entity = PipelineScheduleMapper.to_entity(self.pipeline_schedule_interval)
        entity.schedule = '150'

        PipelineScheduleService.write_pipeline_schedule(entity)

        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'schedule',
            '150',
        )
    def _assert_pipeline_attribute_equals(self, pipeline_id, attribute, expected):
        schedule = PipelineScheduleService.fetch_schedule_for_pipeline(
            pipeline_id,
        )

        self.assertEquals(
            getattr(schedule, attribute),
            expected,
        )
    def test_update_next_run_at_interval_never_ran(self):
        """Test that next_run_at = current time + interval."""
        self.install_fixture('pipeline_schedule_interval')

        # Update next_run_at
        PipelineScheduleService.update_next_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Assert next_run_at is updated to last_run_at + interval
        schedule = PipelineScheduleService.fetch_schedule_for_pipeline(
            self.pipeline.id,
        )

        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'next_run_at',
            datetime.utcnow() + timedelta(seconds=int(schedule.schedule)),
        )
    def test_update_next_run_at_cron(self):
        """Test that next_run_at = croniter parse."""
        self.install_fixture('pipeline_schedule_cron')

        # Set last_run_at to expected
        PipelineScheduleService.update_last_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Update next_run_at
        PipelineScheduleService.update_next_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Assert that next_run_at is expected
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'next_run_at',
            datetime(2014, 2, 1, 0, 5),
        )
    def test_lock_schedule_for_pipeline(self):
        """Test that the schedule gets locked."""
        self.install_fixture('pipeline_schedule_interval')

        # Assert not locked
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'locked',
            False,
        )

        # Lock pipeline
        PipelineScheduleService.lock_schedule_for_pipeline(self.pipeline.id)

        # Assert locked
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'locked',
            True,
        )
    def test_fetch_schedule_for_pipeline(self):
        """Test that you can fetch schedules for a pipeline."""
        self.install_fixture('pipeline_schedule_interval')

        self.assertEqual(
            PipelineScheduleService.fetch_schedule_for_pipeline(
                self.pipeline.id,
            ),

            PipelineScheduleMapper.to_entity(self.pipeline_schedule_interval),
        )
    def test_update_next_run_at_manual(self):
        """Test that next_run_at = none."""
        self.install_fixture('pipeline_schedule_manual')

        # Update last_run_at
        PipelineScheduleService.update_last_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Update next_run_at
        PipelineScheduleService.update_next_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Assert next_run_at is updated to None
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'next_run_at',
            None,
        )
    def test_fetch_schedules_to_run(self):
        """Test that schedules that need to run are returned."""
        self.install_fixture('pipeline_schedule_interval')

        self.assertEqual(
            PipelineScheduleService.fetch_schedules_to_run(),
            [
                PipelineScheduleMapper.to_entity(
                    self.pipeline_schedule_interval,
                ),
            ],
        )
    def test_update_last_run_at(self):
        """Test that last_run_at is updated to the latest date."""
        self.install_fixture('pipeline_schedule_interval')

        # Assert that last_run_at is empty
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'last_run_at',
            None,
        )

        # Set last_run_at to expected
        PipelineScheduleService.update_last_run_at_for_pipeline(
            self.pipeline.id,
        )

        # Assert that last_run_at is expected
        self._assert_pipeline_attribute_equals(
            self.pipeline.id,
            'last_run_at',
            datetime(2014, 02, 01),
        )
    def test_post_run_updates_next_run_at(self, mock_update):
        """Test that next_run_at is updated."""
        self.install_fixture('pipeline_schedule_interval')

        PipelineScheduleService.post_run_schedule(self.pipeline.id)
        mock_update.assert_called_once_with(self.pipeline.id)
    def test_post_run_schedule_unlocks(self, mock_unlock):
        """Test that the schedule is locked."""
        self.install_fixture('pipeline_schedule_interval')

        PipelineScheduleService.post_run_schedule(self.pipeline.id)
        mock_unlock.assert_called_once_with(self.pipeline.id)
Ejemplo n.º 13
0
    def import_pipeline(cls, pipeline_data):
        if not pipeline_data.get('pipeline'):
            raise Exception('pipeline data is required')

        if not pipeline_data.get('pipeline_schedule'):
            raise Exception('pipeline schedule data is required')

        if not pipeline_data.get('tasks'):
            raise Exception('task data is required')

        if not pipeline_data.get('task_connections'):
            raise Exception('task connections are required')

        # Create Pipeline
        pipeline_entity = PipelineEntity(pipeline_data['pipeline'])
        pipeline_entity.validate()

        # Create PipelineSchedule
        pipeline_schedule_entity = PipelineScheduleEntity(pipeline_data['pipeline_schedule'])
        pipeline_schedule_entity.pipeline_id = pipeline_entity.id
        pipeline_schedule_entity.next_run_at = (
            PipelineScheduleService.calculate_next_run_at_for_schedule(
                pipeline_schedule_entity
            )
        )
        pipeline_schedule_entity.validate()

        # Create Tasks
        task_entities = []
        task_alias_to_id = {}

        for task_alias, task_data in pipeline_data['tasks'].items():
            task_entity = TaskEntity(task_data)
            task_entity.validate()

            task_entities.append(task_entity)
            task_alias_to_id[task_alias] = task_entity.id

        # Create TaskConnections
        task_connection_entities = []

        for from_alias, to_aliases in pipeline_data['task_connections'].items():
            from_task_id = task_alias_to_id[from_alias]

            for to_alias in to_aliases:
                task_connection_entity = TaskConnectionEntity({
                    'from_task_id': from_task_id,
                    'to_task_id': task_alias_to_id[to_alias],
                    'pipeline_id': pipeline_entity.id,
                })
                task_connection_entity.validate()

                task_connection_entities.append(task_connection_entity)

        # Save Pipeline
        PipelineService.write_pipeline(pipeline_entity)

        # Save PipelineSchedule (enabled = False)
        PipelineScheduleService.write_pipeline_schedule(pipeline_schedule_entity)

        # Save Tasks
        for task_entity in task_entities:
            TaskService.write_task(task_entity)

        # Save TaskConnections
        for task_connection_entity in task_connection_entities:
            TaskConnectionService.write_task_connection(task_connection_entity)

        return pipeline_entity
Ejemplo n.º 14
0
from ocelot.lib import logging
from ocelot.services import datastores
from ocelot.services.pipeline import PipelineService
from ocelot.services.pipeline_schedule import PipelineScheduleService

log = logging.getLogger('ocelot.scheduler')

SLEEP_SECONDS = 10

if __name__ == '__main__':
    datastores.create_tables()
    datastores.initialize()

    log.info('Starting scheduler')

    try:
        while True:
            pipeline_schedules = PipelineScheduleService.fetch_schedules_to_run()

            log.info('Found {} pipelines to run'.format(len(pipeline_schedules)))

            for schedule in PipelineScheduleService.fetch_schedules_to_run():
                PipelineService.run_pipeline_by_id(schedule.pipeline_id)

            time.sleep(SLEEP_SECONDS)
    except KeyboardInterrupt:
        pass

    log.info('Shutting down scheduler')