def test_subscription_schema(self): start = 's3://my-test-bucket/impala/impala' end = 's3://my-test-bucket/impala/install' regex = '.*\\.rpm' state = None sub = Subscription(data=SubscriptionData( 'test-subscription', 'ABC123', start, end, regex, state=state)) obj_before = sub.to_dict() sub = default_and_validate(sub, subscription_schema()) # state should be defaulted to INACTIVE self.assertNotEqual(obj_before, sub.to_dict())
def test_subscription_schema_invalid(self): with self.assertRaises(DartValidationException) as context: start = 's3://my-test-bucket/impala/impala' end = 's3://my-test-bucket/impala/install' regex = '.*\\.rpm' name = None sub = Subscription( data=SubscriptionData(name, 'ABC123', start, end, regex)) # should fail because the name is missing default_and_validate(sub, subscription_schema()) self.assertTrue(isinstance(context.exception, DartValidationException))
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [ Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT) ] df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED) dataset_data = DatasetData( name='test-dataset', table_name='test_dataset_table', load_type=LoadType.INSERT, location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'), data_format=df, columns=cs, tags=[]) self.dataset = self.dart.save_dataset(Dataset(data=dataset_data)) start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala' end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install' regex = '.*\\.rpm' ds = Subscription(data=SubscriptionData( 'test-subscription', self.dataset.id, start, end, regex)) self.subscription = self.dart.save_subscription(ds) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore = self.dart.save_datastore(dst) wf = Workflow(data=WorkflowData( 'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE)) self.workflow = self.dart.save_workflow(wf, self.datastore.id) a_args = {'subscription_id': self.subscription.id} a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, a_args, state=ActionState.TEMPLATE)) self.action0, self.action1 = self.dart.save_actions( [a0, a1], workflow_id=self.workflow.id)
Column('merchantId', DataType.STRING, path='owen.context.custom.legacy.merchantId'), Column('facebookConnect', DataType.STRING, path='owen.context.custom.facebookConnect'), Column('schemaKey', DataType.STRING, path='schema.key'), ], )))) print 'created dataset: %s' % dataset.id subscription = dart.save_subscription( Subscription(data=SubscriptionData( name='owen_eu_subscription_DW-3213_v3', dataset_id=dataset.id, s3_path_start_prefix_inclusive= 's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/', on_success_email=['*****@*****.**'], on_failure_email=['*****@*****.**'], ))) print 'created subscription: %s' % subscription.id print 'awaiting subscription generation...' subscription = dart.await_subscription_generation(subscription.id) assert subscription.data.state == SubscriptionState.ACTIVE print 'done.' datastore = dart.save_datastore( Datastore(data=DatastoreData( name='owen_eu_parquet_DW-3213_v3', engine_name='emr_engine', state=DatastoreState.ACTIVE,
Column('vegSlice', DataType.STRING), Column('fruitSlice', DataType.STRING), Column('cacheHitMiss', DataType.STRING), ], compression=Compression.BZ2, partitions=[ Column('year', DataType.STRING), Column('week', DataType.STRING), ], ))) print 'created dataset: %s' % dataset.id subscription = dart.save_subscription( Subscription(data=SubscriptionData( name='weblogs_rmn_subscription', dataset_id=dataset.id, on_failure_email=['*****@*****.**', '*****@*****.**'], on_success_email=['*****@*****.**', '*****@*****.**'], ))) print 'created subscription: %s' % subscription.id print 'awaiting subscription generation...' subscription = dart.await_subscription_generation(subscription.id) assert subscription.data.state == SubscriptionState.ACTIVE print 'done.' datastore = dart.save_datastore( Datastore(data=DatastoreData(name='weblogs_rmn_legacy', engine_name='emr_engine', state=DatastoreState.TEMPLATE, args={ 'data_to_freespace_ratio': 0.50,
def _get_engineless_static_subgraphs_by_related_type(graph_entity_service): sub_graph_map = {} d_entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [ Dataset(id=Ref.dataset(1), data=DatasetData(None, None, None, None, None, columns=[], partitions=[])) ]) e_entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [Event(id=Ref.event(1), data=EventData('event'))]) sub_graph_map[None] = [ SubGraph( name='dataset', description='create a new dataset entity', related_type=None, related_is_a=None, graph=graph_entity_service.to_graph(None, d_entity_models), entity_map=graph_entity_service.to_entity_map(d_entity_models), icon='⬟', ), SubGraph( name='event', description='create a new event entity', related_type=None, related_is_a=None, graph=graph_entity_service.to_graph(None, e_entity_models), entity_map=graph_entity_service.to_entity_map(e_entity_models), icon='★', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Subscription(id=Ref.subscription(1), data=SubscriptionData('subscription', Ref.parent())) ]) sub_graph_map[EntityType.dataset] = [ SubGraph( name='subscription', description='create a new subscription entity', related_type=EntityType.dataset, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='⬢', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData(name='%s_trigger' % event_trigger.name, trigger_type_name=event_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[], args={'event_id': Ref.parent()})) ]) sub_graph_map[EntityType.event] = [ SubGraph( name='event trigger', description='create a new event trigger entity', related_type=EntityType.event, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ] entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData( name='%s_trigger' % subscription_batch_trigger.name, trigger_type_name=subscription_batch_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[], args={ 'subscription_id': Ref.parent(), 'unconsumed_data_size_in_bytes': 1000000 })) ]) sub_graph_map[EntityType.subscription] = [ SubGraph( name='subscription batch trigger', description='create a new subscription batch trigger entity', related_type=EntityType.subscription, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ] return sub_graph_map
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs) self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset1_location = 's3://test/dataset/1/%s' + random_id() dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs) self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs) self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data)) s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id)) self.subscription0 = self.dart.save_subscription(s) s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id)) self.subscription2 = self.dart.save_subscription(s) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore0 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore1 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE)) self.datastore2 = self.dart.save_datastore(dst) wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE)) self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id) wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE)) self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id) wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE)) self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id) a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location} a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE)) a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE)) a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) self.action00, self.action01, self.action02, self.action03, self.action04 = \ self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id) a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE)) self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id) a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN)) a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE)) self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id) self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id) self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE))) self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE))) tr_args = {'event_id': self.event1.id} tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE)) self.event_trigger1 = self.dart.save_trigger(tr) tr_args = {'event_id': self.event2.id} tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE)) self.event_trigger2 = self.dart.save_trigger(tr) st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE)) self.super_trigger1 = self.dart.save_trigger(st) st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE)) self.super_trigger2 = self.dart.save_trigger(st)
from dart.model.subscription import Subscription, SubscriptionState from dart.model.subscription import SubscriptionData from dart.model.trigger import TriggerData from dart.model.trigger import Trigger from dart.model.workflow import WorkflowState, Workflow from dart.model.workflow import WorkflowData if __name__ == '__main__': dart = Dart('localhost', 5000) assert isinstance(dart, Dart) subscription = dart.save_subscription( Subscription(data=SubscriptionData( name='rmn_direct_subscription_DW-3307', dataset_id='34HWJLF5N9', s3_path_start_prefix_inclusive= 's3://example-bucket/prd/inbound/overlord/raw/rmndirect/2015/08/18/', on_success_email=['*****@*****.**'], on_failure_email=['*****@*****.**'], ))) print 'created subscription: %s' % subscription.id print 'awaiting subscription generation...' subscription = dart.await_subscription_generation(subscription.id) assert subscription.data.state == SubscriptionState.ACTIVE print 'done.' datastore = dart.save_datastore( Datastore(data=DatastoreData( name='rmn_direct_adhoc', engine_name='emr_engine', state=DatastoreState.ACTIVE,