def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst0 = Datastore(data=DatastoreData('test-datastore0',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst0)
        dst1 = Datastore(data=DatastoreData('test-datastore1',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst1)

        wf0 = Workflow(data=WorkflowData(
            'test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData(
            'test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)

        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action00, self.action01 = self.dart.save_actions(
            [a00, a01], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a11 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action10, self.action11 = self.dart.save_actions(
            [a10, a11], workflow_id=self.workflow1.id)

        tr_args = {'completed_workflow_id': self.workflow0.id}
        tr = Trigger(data=TriggerData('test-trigger', 'workflow_completion',
                                      None, tr_args, TriggerState.ACTIVE))
        self.trigger = self.dart.save_trigger(tr)

        st_args = {
            'fire_after': 'ALL',
            'completed_trigger_ids': [self.trigger.id]
        }
        st = Trigger(data=TriggerData('test-super-trigger', 'super',
                                      [self.workflow1.id], st_args,
                                      TriggerState.ACTIVE))
        self.super_trigger = self.dart.save_trigger(st)
Exemple #2
0
    def test_crud_datastore(self):
        action0 = Action(data=ActionData(
            name=NoOpActionTypes.action_that_succeeds.name,
            action_type_name=NoOpActionTypes.action_that_succeeds.name,
            engine_name='no_op_engine'))
        action1 = Action(data=ActionData(
            name=NoOpActionTypes.action_that_succeeds.name,
            action_type_name=NoOpActionTypes.action_that_succeeds.name,
            engine_name='no_op_engine'))
        posted_actions = self.dart.save_actions(actions=[action0, action1],
                                                datastore_id=self.datastore.id)

        # copy fields that are populated at creation time
        action0.data.datastore_id = posted_actions[0].data.datastore_id
        action1.data.datastore_id = posted_actions[1].data.datastore_id
        action0.data.args = {}
        action1.data.args = {}
        action0.data.order_idx = posted_actions[0].data.order_idx
        action1.data.order_idx = posted_actions[1].data.order_idx

        action0.data.user_id = posted_actions[0].data.user_id
        action1.data.user_id = posted_actions[1].data.user_id

        self.assertEqual(posted_actions[0].data.to_dict(),
                         action0.data.to_dict())
        self.assertEqual(posted_actions[1].data.to_dict(),
                         action1.data.to_dict())

        # When retrieving an action, its queue time and state
        # differs from the action default values created by action0 and action1
        a0 = self.dart.get_action(posted_actions[0].id)
        a1 = self.dart.get_action(posted_actions[1].id)
        action0.data.state = a0.data.state
        action1.data.state = a1.data.state
        action0.data.queued_time = a0.data.queued_time
        action1.data.queued_time = a1.data.queued_time

        self.assertEqual(a0.data.to_dict(), action0.data.to_dict())
        self.assertEqual(a1.data.to_dict(), action1.data.to_dict())

        self.dart.delete_action(a0.id)
        self.dart.delete_action(a1.id)

        try:
            self.dart.get_action(a0.id)
        except DartRequestException as e0:
            self.assertEqual(e0.response.status_code, 404)
            try:
                self.dart.get_action(a1.id)
            except DartRequestException as e1:
                self.assertEqual(e1.response.status_code, 404)
                return

        self.fail('action should have been missing after delete!')
Exemple #3
0
    def test_crud_workflow(self):
        action0 = Action(data=ActionData(
            name=NoOpActionTypes.action_that_succeeds.name,
            action_type_name=NoOpActionTypes.action_that_succeeds.name,
            state=ActionState.TEMPLATE,
            engine_name='no_op_engine'))
        action1 = Action(data=ActionData(
            name=NoOpActionTypes.action_that_succeeds.name,
            action_type_name=NoOpActionTypes.action_that_succeeds.name,
            state=ActionState.TEMPLATE,
            engine_name='no_op_engine'))
        posted_actions = self.dart.save_actions([action0, action1],
                                                workflow_id=self.workflow.id)

        # copy fields that are populated at creation time
        action0.data.workflow_id = posted_actions[0].data.workflow_id
        action1.data.workflow_id = posted_actions[1].data.workflow_id
        action0.data.order_idx = posted_actions[0].data.order_idx
        action1.data.order_idx = posted_actions[1].data.order_idx
        action0.data.args = {}
        action1.data.args = {}

        action0.data.user_id = posted_actions[0].data.user_id
        action1.data.user_id = posted_actions[1].data.user_id

        self.assertEqual(posted_actions[0].data.to_dict(),
                         action0.data.to_dict())
        self.assertEqual(posted_actions[1].data.to_dict(),
                         action1.data.to_dict())

        a0 = self.dart.get_action(posted_actions[0].id)
        a1 = self.dart.get_action(posted_actions[1].id)
        self.assertEqual(a0.data.to_dict(), action0.data.to_dict())
        self.assertEqual(a1.data.to_dict(), action1.data.to_dict())

        self.dart.delete_action(a0.id)
        self.dart.delete_action(a1.id)

        try:
            self.dart.get_action(a0.id)
        except DartRequestException as e0:
            self.assertEqual(e0.response.status_code, 404)
            try:
                self.dart.get_action(a1.id)
            except DartRequestException as e1:
                self.assertEqual(e1.response.status_code, 404)
                return

        self.fail('action should have been missing after delete!')
Exemple #4
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [
            Column('c1', DataType.VARCHAR, 50),
            Column('c2', DataType.BIGINT)
        ]
        df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED)
        dataset_data = DatasetData(
            name='test-dataset',
            table_name='test_dataset_table',
            load_type=LoadType.INSERT,
            location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'),
            data_format=df,
            columns=cs,
            tags=[])
        self.dataset = self.dart.save_dataset(Dataset(data=dataset_data))

        start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala'
        end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install'
        regex = '.*\\.rpm'
        ds = Subscription(data=SubscriptionData(
            'test-subscription', self.dataset.id, start, end, regex))
        self.subscription = self.dart.save_subscription(ds)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.TEMPLATE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a_args = {'subscription_id': self.subscription.id}
        a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                    NoOpActionTypes.action_that_succeeds.name,
                                    state=ActionState.TEMPLATE))
        a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name,
                                    NoOpActionTypes.consume_subscription.name,
                                    a_args,
                                    state=ActionState.TEMPLATE))
        self.action0, self.action1 = self.dart.save_actions(
            [a0, a1], workflow_id=self.workflow.id)
Exemple #5
0
    def test_lost_engine_container(self):
        a = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                   NoOpActionTypes.action_that_succeeds.name,
                                   state=ActionState.HAS_NEVER_RUN))
        action = self.dart.save_actions([a], datastore_id=self.datastore.id)[0]

        action = self.dart.await_action_completion(action.id)
        self.assertEqual(action.data.state, ActionState.FAILED)

        datastore = self.dart.get_datastore(self.datastore.id)
        self.assertEqual(datastore.data.state, DatastoreState.INACTIVE)

        self.dart.delete_action(action.id)
Exemple #6
0
    def test_action_schema_invalid(self):
        with self.assertRaises(DartValidationException) as context:
            a = Action(data=ActionData(
                'copy_hdfs_to_s3',
                'copy_hdfs_to_s3',
                {
                    'source_hdfs_path': 'hdfs:///user/hive/warehouse/dtest4',
                    # 'destination_s3_path': 's3://fake-bucket/dart_testing',
                },
                engine_name='no_op_engine'))
            # should fail because destination_s3_path is required
            default_and_validate(
                a,
                action_schema(NoOpActionTypes.copy_hdfs_to_s3_action.
                              params_json_schema)).to_dict()

        self.assertTrue(isinstance(context.exception, DartValidationException))
Exemple #7
0
 def test_action_schema(self):
     last_in_workflow = None
     a = Action(data=ActionData(
         'copy_hdfs_to_s3',
         'copy_hdfs_to_s3', {
             'source_hdfs_path': 'hdfs:///user/hive/warehouse/dtest4',
             'destination_s3_path': 's3://fake-bucket/dart_testing',
         },
         engine_name='no_op_engine',
         last_in_workflow=last_in_workflow))
     obj_before = a.to_dict()
     obj_after = default_and_validate(
         a,
         action_schema(NoOpActionTypes.copy_hdfs_to_s3_action.
                       params_json_schema)).to_dict()
     # many fields should have been defaulted, making these unequal
     self.assertNotEqual(obj_before, obj_after)
Exemple #8
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.ACTIVE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a = Action(data=ActionData(NoOpActionTypes.action_that_fails.name,
                                   NoOpActionTypes.action_that_fails.name,
                                   state=ActionState.TEMPLATE))
        self.dart.save_actions([a], workflow_id=self.workflow.id)
    def test_consume_subscription_exceptions(self):
        a = Action(id='abc123', data=ActionData('a_name', 'a_name', workflow_instance_id='abc123', args={'subscription_id': 0}))
        d = Datastore(id='abc123')

        mock_engine = self.init_mocks(Mock(side_effect=Exception()))
        with self.assertRaises(Exception):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=Exception()))
        with self.assertRaises(ActionFailedButConsumeSuccessfulException):
            consume_subscription(mock_engine, d, a, consume_successful=True)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, False))))
        with self.assertRaises(Exception):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, True))))
        with self.assertRaises(ActionFailedButConsumeSuccessfulException):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()
Exemple #10
0
 def setUp(self):
     self.actionModel = Action()
     self.actionDataModel = ActionData(name="action data name",
                                       action_type_name="test")
Exemple #11
0
    print 'created dataset: %s' % dataset.id

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='weblogs_DW-3500_holiday_readiness',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'instance_count': 30,
                'data_to_freespace_ratio': 0.30,
            })))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData(
                'load_dataset',
                'load_dataset',
                args={
                    'dataset_id': dataset.id,
                    's3_path_start_prefix_inclusive':
                    's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/30',
                    's3_path_end_prefix_exclusive':
                    's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/36',
                    # 's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*',
                    'target_file_format': FileFormat.RCFILE,
                    'target_row_format': RowFormat.NONE,
                    'target_compression': Compression.SNAPPY,
                })),
        ],
    print 'created datastore: %s' % datastore.id

    workflow = dart.save_workflow(Workflow(
        data=WorkflowData(
            'load_beacon_native_app_impala',
            datastore.id,
            state=WorkflowState.ACTIVE,
            on_failure_email=['*****@*****.**'],
            on_success_email=['*****@*****.**'],
            on_started_email=['*****@*****.**'],
        )
    ), datastore.id)
    print 'created workflow: %s' % workflow.id

    a0, a1 = dart.save_actions([
        Action(data=ActionData('start_datastore', 'start_datastore', state=ActionState.TEMPLATE)),
        Action(data=ActionData('load_dataset', 'load_dataset', state=ActionState.TEMPLATE, args={
            'dataset_id': dataset.id,
            's3_path_start_prefix_inclusive': 's3://example-bucket/prd/beacon/native_app/v2/parquet/snappy/createdpartition=2015-06-27',
        })),
    ], workflow_id=workflow.id)
    print 'created action: %s' % a0.id
    print 'created action: %s' % a1.id

    event = dart.save_event(Event(data=EventData('beacon_native_app_to_parquet_emr_job_completion', state=EventState.ACTIVE)))
    print 'created event: %s' % event.id

    trigger = dart.save_trigger(Trigger(data=TriggerData(
        'beacon_native_app_to_parquet_emr_job_completion_trigger',
        'event',
        [workflow.id],
    assert isinstance(dart, Dart)

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='amaceiras_beacon_native_app_null_coupons_issue',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'data_to_freespace_ratio': 0.05,
                'instance_count': 5,
            })))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData(
                'load_dataset',
                'load_dataset',
                args={
                    'dataset_id': 'URBA9XEQEF',
                    's3_path_start_prefix_inclusive':
                    's3://example-bucket/nb.retailmenot.com/parsed_logs/2015/33/beacon-v2-2015-08-18',
                    # 's3_path_end_prefix_exclusive': 's3://example-bucket/nb.retailmenot.com/parsed_logs/2015/31/beacon-v2-2015-08-01',
                    's3_path_regex_filter': '.*\\.tsv',
                    'target_file_format': FileFormat.PARQUET,
                    'target_row_format': RowFormat.NONE,
                    'target_compression': Compression.SNAPPY,
                })),
        ],
        datastore_id=datastore.id
Exemple #14
0
        state=WorkflowState.ACTIVE,
        on_failure_email=['*****@*****.**', '*****@*****.**'],
        on_success_email=['*****@*****.**', '*****@*****.**'],
        on_started_email=['*****@*****.**', '*****@*****.**'],
    )),
                                  datastore_id=datastore.id)
    print 'created workflow: %s' % workflow.id

    a2 = dart.save_actions(actions=[
        Action(data=ActionData('consume_subscription',
                               'consume_subscription',
                               state=ActionState.TEMPLATE,
                               args={
                                   'subscription_id': subscription.id,
                                   'target_file_format': FileFormat.TEXTFILE,
                                   'target_row_format': RowFormat.DELIMITED,
                                   'target_compression': Compression.GZIP,
                                   'target_delimited_by': '\t',
                                   'target_quoted_by': '"',
                                   'target_escaped_by': '\\',
                                   'target_null_string': 'NULL',
                               })),
    ],
                           workflow_id=workflow.id)[0]
    print 'created workflow action: %s' % a2.id

    trigger = dart.save_trigger(
        Trigger(data=TriggerData(name='weblogs_DW-3213_v3',
                                 trigger_type_name='subscription_batch',
                                 workflow_ids=[workflow.id],
                                 args={
Exemple #15
0
def _get_static_subgraphs_by_related_type(engine, graph_entity_service):
    engine_name = engine.data.name
    sub_graph_map = {EntityType.workflow: []}

    for action_type in engine.data.supported_action_types:
        entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
            [
                Action(
                    id=Ref.action(1),
                    data=ActionData(
                        name=action_type.name,
                        action_type_name=action_type.name,
                        engine_name=engine_name,
                        workflow_id=Ref.parent(),
                        state=ActionState.TEMPLATE,
                        args={} if action_type.params_json_schema else None))
            ])
        sub_graph_map[EntityType.workflow].append(
            SubGraph(
                name=action_type.name,
                description=action_type.description,
                related_type=EntityType.workflow,
                related_is_a=Relationship.PARENT,
                graph=graph_entity_service.to_graph(None, entity_models),
                entity_map=graph_entity_service.to_entity_map(entity_models),
                icon='●',
            ))

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % workflow_completion_trigger.name,
                    trigger_type_name=workflow_completion_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[],
                    args={'completed_workflow_id': Ref.parent()}))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='workflow completion trigger',
            description='create a new workflow_completion trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % scheduled_trigger.name,
                    trigger_type_name=scheduled_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[Ref.child()],
                ))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='scheduled trigger',
            description='create a new scheduled trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.CHILD,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % super_trigger.name,
                    trigger_type_name=super_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[Ref.child()],
                ))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='super trigger',
            description='create a new super trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.CHILD,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Workflow(id=Ref.workflow(1),
                 data=WorkflowData(name='workflow',
                                   datastore_id=Ref.parent(),
                                   engine_name=engine_name,
                                   state=WorkflowState.INACTIVE))
    ])
    sub_graph_map[EntityType.datastore] = [
        SubGraph(
            name='workflow',
            description='create a new workflow entity',
            related_type=EntityType.datastore,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='◆',
        )
    ]

    for action_type in engine.data.supported_action_types:
        entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
            [
                Action(
                    id=Ref.action(1),
                    data=ActionData(
                        name=action_type.name,
                        action_type_name=action_type.name,
                        engine_name=engine_name,
                        datastore_id=Ref.parent(),
                        state=ActionState.HAS_NEVER_RUN,
                        args={} if action_type.params_json_schema else None))
            ])
        sub_graph_map[EntityType.datastore].append(
            SubGraph(
                name=action_type.name,
                description=action_type.description,
                related_type=EntityType.datastore,
                related_is_a=Relationship.PARENT,
                graph=graph_entity_service.to_graph(None, entity_models),
                entity_map=graph_entity_service.to_entity_map(entity_models),
                icon='●',
            ))

    return sub_graph_map
Exemple #16
0
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='owen_eu_parquet_DW-3213_v3',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'data_to_freespace_ratio': 0.05,
                'instance_count': 3,
            })))
    print 'created datastore: %s' % datastore.id

    a0, a1 = dart.save_actions(actions=[
        Action(data=ActionData('start_datastore', 'start_datastore')),
        Action(data=ActionData(
            'load_dataset',
            'load_dataset',
            args={
                'dataset_id': dataset.id,
                's3_path_end_prefix_exclusive':
                's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/',
                'target_file_format': FileFormat.PARQUET,
                'target_row_format': RowFormat.NONE,
                'target_compression': Compression.SNAPPY,
            })),
    ],
                               datastore_id=datastore.id)
    print 'created action: %s' % a0.id
    print 'created action: %s' % a1.id
Exemple #17
0
def add_no_op_engine_sub_graphs(config):
    engine_config = config['engines']['no_op_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving no_op_engine sub_graphs')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'no_op_engine':
            engine_id = e.id
    if not engine_id:
        raise

    subgraph_definitions = [
        SubGraphDefinition(data=SubGraphDefinitionData(
            name='workflow chaining demo',
            description='demonstrate workflow chaining',
            engine_name='no_op_engine',
            related_type=EntityType.datastore,
            related_is_a=Relationship.PARENT,
            workflows=[
                Workflow(id=Ref.workflow(1),
                         data=WorkflowData(
                             name='no-op-workflow-chaining-wf1',
                             datastore_id=Ref.parent(),
                             engine_name='no_op_engine',
                             state=WorkflowState.ACTIVE,
                         )),
                Workflow(id=Ref.workflow(2),
                         data=WorkflowData(
                             name='no-op-workflow-chaining-wf2',
                             datastore_id=Ref.parent(),
                             engine_name='no_op_engine',
                             state=WorkflowState.ACTIVE,
                         )),
            ],
            actions=[
                Action(id=Ref.action(1),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           engine_name='no_op_engine',
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           workflow_id=Ref.workflow(1),
                           order_idx=1,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(2),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=2,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(3),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=3,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(4),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=4,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(5),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=1,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(6),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=2,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(7),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_fails.name,
                           action_type_name=NoOpActionTypes.action_that_fails.
                           name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=3,
                           state=ActionState.TEMPLATE,
                       )),
            ],
            triggers=[
                Trigger(id=Ref.trigger(1),
                        data=TriggerData(
                            name='no-op-trigger-workflow-completion',
                            trigger_type_name=workflow_completion_trigger.name,
                            workflow_ids=[Ref.workflow(2)],
                            state=TriggerState.ACTIVE,
                            args={'completed_workflow_id': Ref.workflow(1)})),
            ],
        ))
    ]

    for e in subgraph_definitions:
        s = dart.save_subgraph_definition(e, engine_id)
        _logger.info('created subgraph_definition: %s' % s.id)
Exemple #18
0
    datastore = dart.save_datastore(Datastore(
        data=DatastoreData(
            name='weblogs_DW-3503',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                'data_to_freespace_ratio': 0.30,
            }
        )
    ))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData('load_dataset', 'load_dataset', args={
                'dataset_id': dataset.id,
                's3_path_start_prefix_inclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/50',
                's3_path_end_prefix_exclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/00',
                's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*',
                'target_file_format': FileFormat.TEXTFILE,
                'target_row_format': RowFormat.DELIMITED,
                'target_compression': Compression.GZIP,
                'target_delimited_by': '\t',
                'target_quoted_by': '"',
                'target_escaped_by': '\\',
                'target_null_string': 'NULL',
            })),
        ],
        datastore_id=datastore.id
Exemple #19
0
from dart.client.python.dart_client import Dart
from dart.model.action import Action
from dart.model.action import ActionData

if __name__ == '__main__':
    dart = Dart('localhost', 5000)
    assert isinstance(dart, Dart)

    action = dart.save_actions(actions=[
        Action(data=ActionData('terminate_datastore', 'terminate_datastore')),
    ],
                               datastore_id='80WJRQDHXK')[0]
    print 'created action: %s' % action.id
Exemple #20
0
from dart.client.python.dart_client import Dart
from dart.model.action import Action
from dart.model.action import ActionData
from dart.model.dataset import FileFormat

if __name__ == '__main__':
    dart = Dart('localhost', 5000)
    assert isinstance(dart, Dart)

    action = dart.save_actions([
        Action(data=ActionData(
            'load_dataset',
            'load_dataset',
            args={
                'dataset_id': 'NVVLBI7CWB',
                's3_path_start_prefix_inclusive':
                's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/52',
                's3_path_end_prefix_exclusive':
                's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/00',
                's3_path_regex_filter':
                's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*',
                'target_file_format': FileFormat.PARQUET,
            })),
    ],
                               datastore_id='IOMUQ5L8AX')[0]
    print 'created action: %s' % action.id
Exemple #21
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs)
        self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset1_location = 's3://test/dataset/1/%s' + random_id()
        dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs)
        self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs)
        self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data))

        s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id))
        self.subscription0 = self.dart.save_subscription(s)

        s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id))
        self.subscription2 = self.dart.save_subscription(s)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE))
        self.datastore2 = self.dart.save_datastore(dst)

        wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)
        wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE))
        self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id)

        a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location}
        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE))
        a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE))
        a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        self.action00, self.action01, self.action02, self.action03, self.action04 = \
            self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE))
        self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id)

        a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN))
        a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE))
        self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id)
        self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id)

        self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE)))
        self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE)))

        tr_args = {'event_id': self.event1.id}
        tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger1 = self.dart.save_trigger(tr)

        tr_args = {'event_id': self.event2.id}
        tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger2 = self.dart.save_trigger(tr)

        st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE))
        self.super_trigger1 = self.dart.save_trigger(st)

        st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE))
        self.super_trigger2 = self.dart.save_trigger(st)