예제 #1
0
 def test_subscription_schema(self):
     start = 's3://my-test-bucket/impala/impala'
     end = 's3://my-test-bucket/impala/install'
     regex = '.*\\.rpm'
     state = None
     sub = Subscription(data=SubscriptionData(
         'test-subscription', 'ABC123', start, end, regex, state=state))
     obj_before = sub.to_dict()
     sub = default_and_validate(sub, subscription_schema())
     # state should be defaulted to INACTIVE
     self.assertNotEqual(obj_before, sub.to_dict())
예제 #2
0
    def test_subscription_schema_invalid(self):
        with self.assertRaises(DartValidationException) as context:
            start = 's3://my-test-bucket/impala/impala'
            end = 's3://my-test-bucket/impala/install'
            regex = '.*\\.rpm'
            name = None
            sub = Subscription(
                data=SubscriptionData(name, 'ABC123', start, end, regex))
            # should fail because the name is missing
            default_and_validate(sub, subscription_schema())

        self.assertTrue(isinstance(context.exception, DartValidationException))
예제 #3
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [
            Column('c1', DataType.VARCHAR, 50),
            Column('c2', DataType.BIGINT)
        ]
        df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED)
        dataset_data = DatasetData(
            name='test-dataset',
            table_name='test_dataset_table',
            load_type=LoadType.INSERT,
            location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'),
            data_format=df,
            columns=cs,
            tags=[])
        self.dataset = self.dart.save_dataset(Dataset(data=dataset_data))

        start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala'
        end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install'
        regex = '.*\\.rpm'
        ds = Subscription(data=SubscriptionData(
            'test-subscription', self.dataset.id, start, end, regex))
        self.subscription = self.dart.save_subscription(ds)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.TEMPLATE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a_args = {'subscription_id': self.subscription.id}
        a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                    NoOpActionTypes.action_that_succeeds.name,
                                    state=ActionState.TEMPLATE))
        a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name,
                                    NoOpActionTypes.consume_subscription.name,
                                    a_args,
                                    state=ActionState.TEMPLATE))
        self.action0, self.action1 = self.dart.save_actions(
            [a0, a1], workflow_id=self.workflow.id)
예제 #4
0
                Column('merchantId',
                       DataType.STRING,
                       path='owen.context.custom.legacy.merchantId'),
                Column('facebookConnect',
                       DataType.STRING,
                       path='owen.context.custom.facebookConnect'),
                Column('schemaKey', DataType.STRING, path='schema.key'),
            ],
        ))))
    print 'created dataset: %s' % dataset.id

    subscription = dart.save_subscription(
        Subscription(data=SubscriptionData(
            name='owen_eu_subscription_DW-3213_v3',
            dataset_id=dataset.id,
            s3_path_start_prefix_inclusive=
            's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/',
            on_success_email=['*****@*****.**'],
            on_failure_email=['*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='owen_eu_parquet_DW-3213_v3',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
예제 #5
0
                Column('vegSlice', DataType.STRING),
                Column('fruitSlice', DataType.STRING),
                Column('cacheHitMiss', DataType.STRING),
            ],
            compression=Compression.BZ2,
            partitions=[
                Column('year', DataType.STRING),
                Column('week', DataType.STRING),
            ],
        )))
    print 'created dataset: %s' % dataset.id

    subscription = dart.save_subscription(
        Subscription(data=SubscriptionData(
            name='weblogs_rmn_subscription',
            dataset_id=dataset.id,
            on_failure_email=['*****@*****.**', '*****@*****.**'],
            on_success_email=['*****@*****.**', '*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(name='weblogs_rmn_legacy',
                                     engine_name='emr_engine',
                                     state=DatastoreState.TEMPLATE,
                                     args={
                                         'data_to_freespace_ratio': 0.50,
예제 #6
0
파일: sub_graph.py 프로젝트: ophiradi/dart
def _get_engineless_static_subgraphs_by_related_type(graph_entity_service):
    sub_graph_map = {}

    d_entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
        [
            Dataset(id=Ref.dataset(1),
                    data=DatasetData(None,
                                     None,
                                     None,
                                     None,
                                     None,
                                     columns=[],
                                     partitions=[]))
        ])
    e_entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
        [Event(id=Ref.event(1), data=EventData('event'))])
    sub_graph_map[None] = [
        SubGraph(
            name='dataset',
            description='create a new dataset entity',
            related_type=None,
            related_is_a=None,
            graph=graph_entity_service.to_graph(None, d_entity_models),
            entity_map=graph_entity_service.to_entity_map(d_entity_models),
            icon='⬟',
        ),
        SubGraph(
            name='event',
            description='create a new event entity',
            related_type=None,
            related_is_a=None,
            graph=graph_entity_service.to_graph(None, e_entity_models),
            entity_map=graph_entity_service.to_entity_map(e_entity_models),
            icon='★',
        ),
    ]

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Subscription(id=Ref.subscription(1),
                     data=SubscriptionData('subscription', Ref.parent()))
    ])
    sub_graph_map[EntityType.dataset] = [
        SubGraph(
            name='subscription',
            description='create a new subscription entity',
            related_type=EntityType.dataset,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='⬢',
        ),
    ]

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(name='%s_trigger' % event_trigger.name,
                                 trigger_type_name=event_trigger.name,
                                 state=TriggerState.INACTIVE,
                                 workflow_ids=[],
                                 args={'event_id': Ref.parent()}))
    ])
    sub_graph_map[EntityType.event] = [
        SubGraph(
            name='event trigger',
            description='create a new event trigger entity',
            related_type=EntityType.event,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ]

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % subscription_batch_trigger.name,
                    trigger_type_name=subscription_batch_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[],
                    args={
                        'subscription_id': Ref.parent(),
                        'unconsumed_data_size_in_bytes': 1000000
                    }))
    ])
    sub_graph_map[EntityType.subscription] = [
        SubGraph(
            name='subscription batch trigger',
            description='create a new subscription batch trigger entity',
            related_type=EntityType.subscription,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ]

    return sub_graph_map
예제 #7
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs)
        self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset1_location = 's3://test/dataset/1/%s' + random_id()
        dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs)
        self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs)
        self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data))

        s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id))
        self.subscription0 = self.dart.save_subscription(s)

        s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id))
        self.subscription2 = self.dart.save_subscription(s)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE))
        self.datastore2 = self.dart.save_datastore(dst)

        wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)
        wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE))
        self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id)

        a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location}
        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE))
        a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE))
        a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        self.action00, self.action01, self.action02, self.action03, self.action04 = \
            self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE))
        self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id)

        a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN))
        a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE))
        self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id)
        self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id)

        self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE)))
        self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE)))

        tr_args = {'event_id': self.event1.id}
        tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger1 = self.dart.save_trigger(tr)

        tr_args = {'event_id': self.event2.id}
        tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger2 = self.dart.save_trigger(tr)

        st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE))
        self.super_trigger1 = self.dart.save_trigger(st)

        st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE))
        self.super_trigger2 = self.dart.save_trigger(st)
예제 #8
0
from dart.model.subscription import Subscription, SubscriptionState
from dart.model.subscription import SubscriptionData
from dart.model.trigger import TriggerData
from dart.model.trigger import Trigger
from dart.model.workflow import WorkflowState, Workflow
from dart.model.workflow import WorkflowData

if __name__ == '__main__':
    dart = Dart('localhost', 5000)
    assert isinstance(dart, Dart)

    subscription = dart.save_subscription(
        Subscription(data=SubscriptionData(
            name='rmn_direct_subscription_DW-3307',
            dataset_id='34HWJLF5N9',
            s3_path_start_prefix_inclusive=
            's3://example-bucket/prd/inbound/overlord/raw/rmndirect/2015/08/18/',
            on_success_email=['*****@*****.**'],
            on_failure_email=['*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='rmn_direct_adhoc',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,