Beispiel #1
0
    def run_pipeline_by_id(cls, id):
        """Runs a pipeline by id.

        :param str id:
        """
        log.info('Running pipeline: {}'.format(id))

        PipelineScheduleService.pre_run_schedule(id)

        # get graph
        graph_data = TaskConnectionService.build_graph_for_pipeline(id)

        # process graph
        queue = deque()

        for source_id in graph_data['source_ids']:
            queue.appendleft((source_id, None))

        while len(queue):
            task_id, data = queue.pop()

            try:
                task_response = TaskService.process_task_with_data(task_id, data)

                try:
                    for next_id in graph_data['graph'][task_id]:
                        queue.appendleft((next_id, task_response))
                except KeyError:
                    # end of list
                    pass
            except StopProcessingException:
                pass

        PipelineScheduleService.post_run_schedule(id)
Beispiel #2
0
    def test_write_task_connection(self):
        """Test that the provided entity gets written to the repository."""
        entity = TaskConnectionEntity.get_mock_object()

        self.assertEquals(
            TaskConnectionService.fetch_task_connections_for_pipeline(entity.pipeline_id),
            [],
        )

        TaskConnectionService.write_task_connection(entity)

        self.assertEquals(
            TaskConnectionService.fetch_task_connections_for_pipeline(entity.pipeline_id),
            [
                entity,
            ],
        )
Beispiel #3
0
    def test_fetch_task_connections_for_pipeline(self, mock_fetch):
        """Test that a list of TaskConnectionEntities is returned."""
        mock_fetch.return_value = [
            self.url_to_log_connection,
            self.raw_input_to_log_connection,
        ]

        entities = TaskConnectionService.fetch_task_connections_for_pipeline(self.pipeline.id)
        self.assertEqual(entities[0].id, self.url_to_log_connection.id)
Beispiel #4
0
    def test_build_graph_for_pipeline(self, mock_fetch):
        """Test that a graph is returned when given a pipeline id."""
        mock_fetch.return_value = [
            self.url_to_log_connection,
            self.raw_input_to_log_connection,
        ]

        graph_data = TaskConnectionService.build_graph_for_pipeline(self.pipeline.id)

        self.assertEquals(graph_data['graph'], {
            self.url_to_log_connection.from_task_id: set([
                self.url_to_log_connection.to_task_id,
            ]),

            self.raw_input_to_log_connection.from_task_id: set([
                self.raw_input_to_log_connection.to_task_id,
            ])
        })

        self.assertItemsEqual(graph_data['source_ids'], [
            self.raw_input_to_log_connection.from_task_id,
            self.url_to_log_connection.from_task_id,
        ])
Beispiel #5
0
    def import_pipeline(cls, pipeline_data):
        if not pipeline_data.get('pipeline'):
            raise Exception('pipeline data is required')

        if not pipeline_data.get('pipeline_schedule'):
            raise Exception('pipeline schedule data is required')

        if not pipeline_data.get('tasks'):
            raise Exception('task data is required')

        if not pipeline_data.get('task_connections'):
            raise Exception('task connections are required')

        # Create Pipeline
        pipeline_entity = PipelineEntity(pipeline_data['pipeline'])
        pipeline_entity.validate()

        # Create PipelineSchedule
        pipeline_schedule_entity = PipelineScheduleEntity(pipeline_data['pipeline_schedule'])
        pipeline_schedule_entity.pipeline_id = pipeline_entity.id
        pipeline_schedule_entity.next_run_at = (
            PipelineScheduleService.calculate_next_run_at_for_schedule(
                pipeline_schedule_entity
            )
        )
        pipeline_schedule_entity.validate()

        # Create Tasks
        task_entities = []
        task_alias_to_id = {}

        for task_alias, task_data in pipeline_data['tasks'].items():
            task_entity = TaskEntity(task_data)
            task_entity.validate()

            task_entities.append(task_entity)
            task_alias_to_id[task_alias] = task_entity.id

        # Create TaskConnections
        task_connection_entities = []

        for from_alias, to_aliases in pipeline_data['task_connections'].items():
            from_task_id = task_alias_to_id[from_alias]

            for to_alias in to_aliases:
                task_connection_entity = TaskConnectionEntity({
                    'from_task_id': from_task_id,
                    'to_task_id': task_alias_to_id[to_alias],
                    'pipeline_id': pipeline_entity.id,
                })
                task_connection_entity.validate()

                task_connection_entities.append(task_connection_entity)

        # Save Pipeline
        PipelineService.write_pipeline(pipeline_entity)

        # Save PipelineSchedule (enabled = False)
        PipelineScheduleService.write_pipeline_schedule(pipeline_schedule_entity)

        # Save Tasks
        for task_entity in task_entities:
            TaskService.write_task(task_entity)

        # Save TaskConnections
        for task_connection_entity in task_connection_entities:
            TaskConnectionService.write_task_connection(task_connection_entity)

        return pipeline_entity