def test_action_schema(self): last_in_workflow = None a = Action(data=ActionData('copy_hdfs_to_s3', 'copy_hdfs_to_s3', { 'source_hdfs_path': 'hdfs:///user/hive/warehouse/dtest4', 'destination_s3_path': 's3://fake-bucket/dart_testing', }, engine_name='no_op_engine', last_in_workflow=last_in_workflow)) obj_before = a.to_dict() obj_after = default_and_validate(a, action_schema(NoOpActionTypes.copy_hdfs_to_s3_action.params_json_schema)).to_dict() # many fields should have been defaulted, making these unequal self.assertNotEqual(obj_before, obj_after)
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart dst_args = {'action_sleep_time_in_seconds': 0} dst0 = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore0 = self.dart.save_datastore(dst0) dst1 = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore1 = self.dart.save_datastore(dst1) wf0 = Workflow(data=WorkflowData( 'test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE)) self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id) wf1 = Workflow(data=WorkflowData( 'test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE)) self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id) a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a01 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) self.action00, self.action01 = self.dart.save_actions( [a00, a01], workflow_id=self.workflow0.id) a10 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a11 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) self.action10, self.action11 = self.dart.save_actions( [a10, a11], workflow_id=self.workflow1.id) tr_args = {'completed_workflow_id': self.workflow0.id} tr = Trigger(data=TriggerData('test-trigger', 'workflow_completion', None, tr_args, TriggerState.ACTIVE)) self.trigger = self.dart.save_trigger(tr) st_args = { 'fire_after': 'ALL', 'completed_trigger_ids': [self.trigger.id] } st = Trigger(data=TriggerData('test-super-trigger', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE)) self.super_trigger = self.dart.save_trigger(st)
def test_crud_datastore(self): action0 = Action(data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes.action_that_succeeds.name, engine_name='no_op_engine')) action1 = Action(data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes.action_that_succeeds.name, engine_name='no_op_engine')) posted_actions = self.dart.save_actions(actions=[action0, action1], datastore_id=self.datastore.id) # copy fields that are populated at creation time action0.data.datastore_id = posted_actions[0].data.datastore_id action1.data.datastore_id = posted_actions[1].data.datastore_id action0.data.args = {} action1.data.args = {} action0.data.order_idx = posted_actions[0].data.order_idx action1.data.order_idx = posted_actions[1].data.order_idx action0.data.user_id = posted_actions[0].data.user_id action1.data.user_id = posted_actions[1].data.user_id self.assertEqual(posted_actions[0].data.to_dict(), action0.data.to_dict()) self.assertEqual(posted_actions[1].data.to_dict(), action1.data.to_dict()) # When retrieving an action, its queue time and state # differs from the action default values created by action0 and action1 a0 = self.dart.get_action(posted_actions[0].id) a1 = self.dart.get_action(posted_actions[1].id) action0.data.state = a0.data.state action1.data.state = a1.data.state action0.data.queued_time = a0.data.queued_time action1.data.queued_time = a1.data.queued_time self.assertEqual(a0.data.to_dict(), action0.data.to_dict()) self.assertEqual(a1.data.to_dict(), action1.data.to_dict()) self.dart.delete_action(a0.id) self.dart.delete_action(a1.id) try: self.dart.get_action(a0.id) except DartRequestException as e0: self.assertEqual(e0.response.status_code, 404) try: self.dart.get_action(a1.id) except DartRequestException as e1: self.assertEqual(e1.response.status_code, 404) return self.fail('action should have been missing after delete!')
def test_crud_workflow(self): action0 = Action(data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE, engine_name='no_op_engine')) action1 = Action(data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE, engine_name='no_op_engine')) posted_actions = self.dart.save_actions([action0, action1], workflow_id=self.workflow.id) # copy fields that are populated at creation time action0.data.workflow_id = posted_actions[0].data.workflow_id action1.data.workflow_id = posted_actions[1].data.workflow_id action0.data.order_idx = posted_actions[0].data.order_idx action1.data.order_idx = posted_actions[1].data.order_idx action0.data.args = {} action1.data.args = {} action0.data.user_id = posted_actions[0].data.user_id action1.data.user_id = posted_actions[1].data.user_id self.assertEqual(posted_actions[0].data.to_dict(), action0.data.to_dict()) self.assertEqual(posted_actions[1].data.to_dict(), action1.data.to_dict()) a0 = self.dart.get_action(posted_actions[0].id) a1 = self.dart.get_action(posted_actions[1].id) self.assertEqual(a0.data.to_dict(), action0.data.to_dict()) self.assertEqual(a1.data.to_dict(), action1.data.to_dict()) self.dart.delete_action(a0.id) self.dart.delete_action(a1.id) try: self.dart.get_action(a0.id) except DartRequestException as e0: self.assertEqual(e0.response.status_code, 404) try: self.dart.get_action(a1.id) except DartRequestException as e1: self.assertEqual(e1.response.status_code, 404) return self.fail('action should have been missing after delete!')
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [ Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT) ] df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED) dataset_data = DatasetData( name='test-dataset', table_name='test_dataset_table', load_type=LoadType.INSERT, location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'), data_format=df, columns=cs, tags=[]) self.dataset = self.dart.save_dataset(Dataset(data=dataset_data)) start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala' end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install' regex = '.*\\.rpm' ds = Subscription(data=SubscriptionData( 'test-subscription', self.dataset.id, start, end, regex)) self.subscription = self.dart.save_subscription(ds) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore = self.dart.save_datastore(dst) wf = Workflow(data=WorkflowData( 'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE)) self.workflow = self.dart.save_workflow(wf, self.datastore.id) a_args = {'subscription_id': self.subscription.id} a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, a_args, state=ActionState.TEMPLATE)) self.action0, self.action1 = self.dart.save_actions( [a0, a1], workflow_id=self.workflow.id)
def post_datastore_actions(datastore): """ :type datastore: dart.model.datastore.Datastore """ request_json = request.get_json() if not isinstance(request_json, list): request_json = [request_json] actions = [] for action_json in request_json: action = Action.from_dict(action_json) action.data.datastore_id = datastore.id action.data.state = ActionState.HAS_NEVER_RUN actions.append(action) engine_name = datastore.data.engine_name saved_actions = [ a.to_dict() for a in action_service().save_actions( actions, engine_name, datastore=datastore) ] trigger_proxy().try_next_action({ 'datastore_id': datastore.id, 'log_info': { 'user_id': current_user.email } }) return {'results': saved_actions}
def patch_action(action): """ :type action: dart.model.action.Action """ p = JsonPatch(request.get_json()) sanitized_action = action.copy() patched_action = Action.from_dict(p.apply(action.to_dict())) # only allow updating fields that are editable sanitized_action.data.name = patched_action.data.name sanitized_action.data.args = patched_action.data.args sanitized_action.data.tags = patched_action.data.tags sanitized_action.data.progress = patched_action.data.progress sanitized_action.data.order_idx = patched_action.data.order_idx sanitized_action.data.on_failure = patched_action.data.on_failure sanitized_action.data.on_failure_email = patched_action.data.on_failure_email sanitized_action.data.on_success_email = patched_action.data.on_success_email sanitized_action.data.extra_data = patched_action.data.extra_data # revalidate sanitized_action = action_service().default_and_validate_action( sanitized_action) return { 'results': action_service().patch_action(action, sanitized_action).to_dict() }
def test_action_model_with_data(self): self.actionModel = Action(id="1", data=self.actionDataModel, version_id=2) self.assertEqual( str(self.actionModel), "updated='None', data='first_in_workflow='False', workflow_instance_id='None', on_success_email='[]', workflow_id='None', ecs_task_arn='None', on_failure='DEACTIVATE', user_id='anonymous', order_idx='None', state='HAS_NEVER_RUN', workflow_action_id='None', progress='None', extra_data='None', tags='[]', batch_job_id='None', start_time='None', args='None', last_in_workflow='False', datastore_id='None', on_failure_email='[]', avg_runtime='None', name='action data name', engine_name='None', error_message='None', queued_time='None', end_time='None', action_type_name='test', completed_runs='0'', id='1', version_id='2', created='None'" )
def test_action_schema(self): last_in_workflow = None a = Action(data=ActionData( 'copy_hdfs_to_s3', 'copy_hdfs_to_s3', { 'source_hdfs_path': 'hdfs:///user/hive/warehouse/dtest4', 'destination_s3_path': 's3://fake-bucket/dart_testing', }, engine_name='no_op_engine', last_in_workflow=last_in_workflow)) obj_before = a.to_dict() obj_after = default_and_validate( a, action_schema(NoOpActionTypes.copy_hdfs_to_s3_action. params_json_schema)).to_dict() # many fields should have been defaulted, making these unequal self.assertNotEqual(obj_before, obj_after)
def test_lost_engine_container(self): a = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN)) action = self.dart.save_actions([a], datastore_id=self.datastore.id)[0] action = self.dart.await_action_completion(action.id) self.assertEqual(action.data.state, ActionState.FAILED) datastore = self.dart.get_datastore(self.datastore.id) self.assertEqual(datastore.data.state, DatastoreState.INACTIVE) self.dart.delete_action(action.id)
def test_action_schema_invalid(self): with self.assertRaises(DartValidationException) as context: a = Action(data=ActionData( 'copy_hdfs_to_s3', 'copy_hdfs_to_s3', { 'source_hdfs_path': 'hdfs:///user/hive/warehouse/dtest4', # 'destination_s3_path': 's3://fake-bucket/dart_testing', }, engine_name='no_op_engine')) # should fail because destination_s3_path is required default_and_validate( a, action_schema(NoOpActionTypes.copy_hdfs_to_s3_action. params_json_schema)).to_dict() self.assertTrue(isinstance(context.exception, DartValidationException))
def post_datastore_actions(datastore): """ :type datastore: dart.model.datastore.Datastore """ request_json = request.get_json() if not isinstance(request_json, list): request_json = [request_json] actions = [] for action_json in request_json: action = Action.from_dict(action_json) action.data.datastore_id = datastore.id action.data.state = ActionState.HAS_NEVER_RUN actions.append(action) engine_name = datastore.data.engine_name saved_actions = [a.to_dict() for a in action_service().save_actions(actions, engine_name, datastore=datastore)] trigger_proxy().try_next_action({'datastore_id':datastore.id, 'log_info':{'user_id': current_user.email}}) return {'results': saved_actions}
def post_workflow_actions(workflow): """ :type workflow: dart.model.workflow.Workflow """ request_json = request.get_json() if not isinstance(request_json, list): request_json = [request_json] actions = [] for action_json in request_json: action = Action.from_dict(action_json) action.data.workflow_id = workflow.id action.data.state = ActionState.TEMPLATE actions.append(action) datastore = datastore_service().get_datastore(workflow.data.datastore_id) engine_name = datastore.data.engine_name saved_actions = [a.to_dict() for a in action_service().save_actions(actions, engine_name)] return {'results': saved_actions}
def _resolve_and_save_action(self, entity_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id): actual_id, unsaved_id = self._resolve(EntityType.action, entity_id, entity_map, actual_entities_by_unsaved_id) if actual_id: return actual_id node_id = self._node_id(EntityType.action, unsaved_id) action = Action.from_dict(entity_map['unsaved_entities'][node_id]) assert isinstance(action, Action) if action.data.datastore_id: action.data.datastore_id = self._resolve_and_save_datastore(action.data.datastore_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id) if action.data.workflow_id: action.data.workflow_id = self._resolve_and_save_workflow(action.data.workflow_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id) if action.data.args and action.data.args.get('subscription_id'): action.data.args['subscription_id'] = self._resolve_and_save_subscription(action.data.args['subscription_id'], entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id) if action.data.args and action.data.args.get('dataset_id'): action.data.args['dataset_id'] = self._resolve_and_save_dataset(action.data.args['dataset_id'], entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id) engine_name, datastore = self._find_engine_name_and_datastore(action) action = self._action_service.save_actions([action], engine_name, datastore, commit=False, flush=True)[0] actual_entities_by_node_id[node_id] = action actual_entities_by_unsaved_id[unsaved_id] = action return action.id
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE)) self.datastore = self.dart.save_datastore(dst) wf = Workflow(data=WorkflowData( 'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE)) self.workflow = self.dart.save_workflow(wf, self.datastore.id) a = Action(data=ActionData(NoOpActionTypes.action_that_fails.name, NoOpActionTypes.action_that_fails.name, state=ActionState.TEMPLATE)) self.dart.save_actions([a], workflow_id=self.workflow.id)
def test_action_model_with_data(self): self.actionModel = Action(id="1", data=self.actionDataModel, version_id=2) self.assertEqual(self.actionModel.id, "1") self.assertEqual(self.actionModel.version_id, 2) self.assertEqual(self.actionModel.created, None) self.assertEqual(self.actionModel.updated, None) self.assertEqual(self.actionModel.data.first_in_workflow, False) self.assertEqual(self.actionModel.data.workflow_instance_id, None) self.assertEqual(self.actionModel.data.on_success_email, []) self.assertEqual(self.actionModel.data.workflow_id, None) self.assertEqual(self.actionModel.data.ecs_task_arn, None) self.assertEqual(self.actionModel.data.on_failure, 'DEACTIVATE') self.assertEqual(self.actionModel.data.user_id, 'anonymous') self.assertEqual(self.actionModel.data.order_idx, None) self.assertEqual(self.actionModel.data.state, 'HAS_NEVER_RUN') self.assertEqual(self.actionModel.data.workflow_action_id, None) self.assertEqual(self.actionModel.data.progress, None) self.assertEqual(self.actionModel.data.extra_data, None) self.assertEqual(self.actionModel.data.tags, []) self.assertEqual(self.actionModel.data.parallelization_idx, None) self.assertEqual(self.actionModel.data.batch_job_id, None) self.assertEqual(self.actionModel.data.start_time, None) self.assertEqual(self.actionModel.data.args, None) self.assertEqual(self.actionModel.data.last_in_workflow, False) self.assertEqual(self.actionModel.data.datastore_id, None) self.assertEqual(self.actionModel.data.on_failure_email, []) self.assertEqual(self.actionModel.data.avg_runtime, None) self.assertEqual(self.actionModel.data.name, 'action data name') self.assertEqual(self.actionModel.data.engine_name, None) self.assertEqual(self.actionModel.data.parallelization_parents, []) self.assertEqual(self.actionModel.data.error_message, None) self.assertEqual(self.actionModel.data.queued_time, None) self.assertEqual(self.actionModel.data.end_time, None) self.assertEqual(self.actionModel.data.action_type_name, 'test') self.assertEqual(self.actionModel.data.completed_runs, 0)
def test_consume_subscription_exceptions(self): a = Action(id='abc123', data=ActionData('a_name', 'a_name', workflow_instance_id='abc123', args={'subscription_id': 0})) d = Datastore(id='abc123') mock_engine = self.init_mocks(Mock(side_effect=Exception())) with self.assertRaises(Exception): consume_subscription(mock_engine, d, a) mock_engine.dart.patch_action.assert_not_called() mock_engine = self.init_mocks(Mock(side_effect=Exception())) with self.assertRaises(ActionFailedButConsumeSuccessfulException): consume_subscription(mock_engine, d, a, consume_successful=True) mock_engine.dart.patch_action.assert_not_called() mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, False)))) with self.assertRaises(Exception): consume_subscription(mock_engine, d, a) mock_engine.dart.patch_action.assert_not_called() mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, True)))) with self.assertRaises(ActionFailedButConsumeSuccessfulException): consume_subscription(mock_engine, d, a) mock_engine.dart.patch_action.assert_not_called()
from dart.client.python.dart_client import Dart from dart.model.action import Action from dart.model.action import ActionData from dart.model.dataset import FileFormat if __name__ == '__main__': dart = Dart('localhost', 5000) assert isinstance(dart, Dart) action = dart.save_actions([ Action(data=ActionData( 'load_dataset', 'load_dataset', args={ 'dataset_id': 'NVVLBI7CWB', 's3_path_start_prefix_inclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/52', 's3_path_end_prefix_exclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/00', 's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*', 'target_file_format': FileFormat.PARQUET, })), ], datastore_id='IOMUQ5L8AX')[0] print 'created action: %s' % action.id
def add_no_op_engine_sub_graphs(config): engine_config = config['engines']['no_op_engine'] opts = engine_config['options'] dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version']) assert isinstance(dart, Dart) _logger.info('saving no_op_engine sub_graphs') engine_id = None for e in dart.get_engines(): if e.data.name == 'no_op_engine': engine_id = e.id if not engine_id: raise subgraph_definitions = [ SubGraphDefinition(data=SubGraphDefinitionData( name='workflow chaining demo', description='demonstrate workflow chaining', engine_name='no_op_engine', related_type=EntityType.datastore, related_is_a=Relationship.PARENT, workflows=[ Workflow(id=Ref.workflow(1), data=WorkflowData( name='no-op-workflow-chaining-wf1', datastore_id=Ref.parent(), engine_name='no_op_engine', state=WorkflowState.ACTIVE, )), Workflow(id=Ref.workflow(2), data=WorkflowData( name='no-op-workflow-chaining-wf2', datastore_id=Ref.parent(), engine_name='no_op_engine', state=WorkflowState.ACTIVE, )), ], actions=[ Action(id=Ref.action(1), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, engine_name='no_op_engine', action_type_name=NoOpActionTypes. action_that_succeeds.name, workflow_id=Ref.workflow(1), order_idx=1, state=ActionState.TEMPLATE, )), Action(id=Ref.action(2), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes. action_that_succeeds.name, engine_name='no_op_engine', workflow_id=Ref.workflow(1), order_idx=2, state=ActionState.TEMPLATE, )), Action(id=Ref.action(3), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes. action_that_succeeds.name, engine_name='no_op_engine', workflow_id=Ref.workflow(1), order_idx=3, state=ActionState.TEMPLATE, )), Action(id=Ref.action(4), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes. action_that_succeeds.name, engine_name='no_op_engine', workflow_id=Ref.workflow(1), order_idx=4, state=ActionState.TEMPLATE, )), Action(id=Ref.action(5), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes. action_that_succeeds.name, engine_name='no_op_engine', workflow_id=Ref.workflow(2), order_idx=1, state=ActionState.TEMPLATE, )), Action(id=Ref.action(6), data=ActionData( name=NoOpActionTypes.action_that_succeeds.name, action_type_name=NoOpActionTypes. action_that_succeeds.name, engine_name='no_op_engine', workflow_id=Ref.workflow(2), order_idx=2, state=ActionState.TEMPLATE, )), Action(id=Ref.action(7), data=ActionData( name=NoOpActionTypes.action_that_fails.name, action_type_name=NoOpActionTypes.action_that_fails. name, engine_name='no_op_engine', workflow_id=Ref.workflow(2), order_idx=3, state=ActionState.TEMPLATE, )), ], triggers=[ Trigger(id=Ref.trigger(1), data=TriggerData( name='no-op-trigger-workflow-completion', trigger_type_name=workflow_completion_trigger.name, workflow_ids=[Ref.workflow(2)], state=TriggerState.ACTIVE, args={'completed_workflow_id': Ref.workflow(1)})), ], )) ] for e in subgraph_definitions: s = dart.save_subgraph_definition(e, engine_id) _logger.info('created subgraph_definition: %s' % s.id)
datastore = dart.save_datastore(Datastore( data=DatastoreData( name='weblogs_DW-3503', engine_name='emr_engine', state=DatastoreState.ACTIVE, args={ 'data_to_freespace_ratio': 0.30, } ) )) print 'created datastore: %s' % datastore.id actions = dart.save_actions( actions=[ Action(data=ActionData('start_datastore', 'start_datastore')), Action(data=ActionData('load_dataset', 'load_dataset', args={ 'dataset_id': dataset.id, 's3_path_start_prefix_inclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/50', 's3_path_end_prefix_exclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/00', 's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*', 'target_file_format': FileFormat.TEXTFILE, 'target_row_format': RowFormat.DELIMITED, 'target_compression': Compression.GZIP, 'target_delimited_by': '\t', 'target_quoted_by': '"', 'target_escaped_by': '\\', 'target_null_string': 'NULL', })), ], datastore_id=datastore.id
def patch_action(action): """ :type action: dart.model.action.Action """ p = JsonPatch(request.get_json()) return update_action(action, Action.from_dict(p.apply(action.to_dict())))
def put_action(action): """ :type action: dart.model.action.Action """ return update_action(action, Action.from_dict(request.get_json()))
from dart.client.python.dart_client import Dart from dart.model.action import Action from dart.model.action import ActionData if __name__ == '__main__': dart = Dart('localhost', 5000) assert isinstance(dart, Dart) action = dart.save_actions(actions=[ Action(data=ActionData('terminate_datastore', 'terminate_datastore')), ], datastore_id='80WJRQDHXK')[0] print 'created action: %s' % action.id
print 'created datastore: %s' % datastore.id workflow = dart.save_workflow(Workflow( data=WorkflowData( 'load_beacon_native_app_impala', datastore.id, state=WorkflowState.ACTIVE, on_failure_email=['*****@*****.**'], on_success_email=['*****@*****.**'], on_started_email=['*****@*****.**'], ) ), datastore.id) print 'created workflow: %s' % workflow.id a0, a1 = dart.save_actions([ Action(data=ActionData('start_datastore', 'start_datastore', state=ActionState.TEMPLATE)), Action(data=ActionData('load_dataset', 'load_dataset', state=ActionState.TEMPLATE, args={ 'dataset_id': dataset.id, 's3_path_start_prefix_inclusive': 's3://example-bucket/prd/beacon/native_app/v2/parquet/snappy/createdpartition=2015-06-27', })), ], workflow_id=workflow.id) print 'created action: %s' % a0.id print 'created action: %s' % a1.id event = dart.save_event(Event(data=EventData('beacon_native_app_to_parquet_emr_job_completion', state=EventState.ACTIVE))) print 'created event: %s' % event.id trigger = dart.save_trigger(Trigger(data=TriggerData( 'beacon_native_app_to_parquet_emr_job_completion_trigger', 'event', [workflow.id],
print 'created dataset: %s' % dataset.id datastore = dart.save_datastore( Datastore(data=DatastoreData( name='weblogs_DW-3500_holiday_readiness', engine_name='emr_engine', state=DatastoreState.ACTIVE, args={ # 'instance_count': 30, 'data_to_freespace_ratio': 0.30, }))) print 'created datastore: %s' % datastore.id actions = dart.save_actions( actions=[ Action(data=ActionData('start_datastore', 'start_datastore')), Action(data=ActionData( 'load_dataset', 'load_dataset', args={ 'dataset_id': dataset.id, 's3_path_start_prefix_inclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/30', 's3_path_end_prefix_exclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/36', # 's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*', 'target_file_format': FileFormat.RCFILE, 'target_row_format': RowFormat.NONE, 'target_compression': Compression.SNAPPY, })), ],
assert isinstance(dart, Dart) datastore = dart.save_datastore( Datastore(data=DatastoreData( name='amaceiras_beacon_native_app_null_coupons_issue', engine_name='emr_engine', state=DatastoreState.ACTIVE, args={ # 'data_to_freespace_ratio': 0.05, 'instance_count': 5, }))) print 'created datastore: %s' % datastore.id actions = dart.save_actions( actions=[ Action(data=ActionData('start_datastore', 'start_datastore')), Action(data=ActionData( 'load_dataset', 'load_dataset', args={ 'dataset_id': 'URBA9XEQEF', 's3_path_start_prefix_inclusive': 's3://example-bucket/nb.retailmenot.com/parsed_logs/2015/33/beacon-v2-2015-08-18', # 's3_path_end_prefix_exclusive': 's3://example-bucket/nb.retailmenot.com/parsed_logs/2015/31/beacon-v2-2015-08-01', 's3_path_regex_filter': '.*\\.tsv', 'target_file_format': FileFormat.PARQUET, 'target_row_format': RowFormat.NONE, 'target_compression': Compression.SNAPPY, })), ], datastore_id=datastore.id
state=WorkflowState.ACTIVE, on_failure_email=['*****@*****.**', '*****@*****.**'], on_success_email=['*****@*****.**', '*****@*****.**'], on_started_email=['*****@*****.**', '*****@*****.**'], )), datastore_id=datastore.id) print 'created workflow: %s' % workflow.id a2 = dart.save_actions(actions=[ Action(data=ActionData('consume_subscription', 'consume_subscription', state=ActionState.TEMPLATE, args={ 'subscription_id': subscription.id, 'target_file_format': FileFormat.TEXTFILE, 'target_row_format': RowFormat.DELIMITED, 'target_compression': Compression.GZIP, 'target_delimited_by': '\t', 'target_quoted_by': '"', 'target_escaped_by': '\\', 'target_null_string': 'NULL', })), ], workflow_id=workflow.id)[0] print 'created workflow action: %s' % a2.id trigger = dart.save_trigger( Trigger(data=TriggerData(name='weblogs_DW-3213_v3', trigger_type_name='subscription_batch', workflow_ids=[workflow.id], args={
def _get_static_subgraphs_by_related_type(engine, graph_entity_service): engine_name = engine.data.name sub_graph_map = {EntityType.workflow: []} for action_type in engine.data.supported_action_types: entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [ Action( id=Ref.action(1), data=ActionData( name=action_type.name, action_type_name=action_type.name, engine_name=engine_name, workflow_id=Ref.parent(), state=ActionState.TEMPLATE, args={} if action_type.params_json_schema else None)) ]) sub_graph_map[EntityType.workflow].append( SubGraph( name=action_type.name, description=action_type.description, related_type=EntityType.workflow, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='●', )) entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData( name='%s_trigger' % workflow_completion_trigger.name, trigger_type_name=workflow_completion_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[], args={'completed_workflow_id': Ref.parent()})) ]) sub_graph_map[EntityType.workflow].extend([ SubGraph( name='workflow completion trigger', description='create a new workflow_completion trigger entity', related_type=EntityType.workflow, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ]) entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData( name='%s_trigger' % scheduled_trigger.name, trigger_type_name=scheduled_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[Ref.child()], )) ]) sub_graph_map[EntityType.workflow].extend([ SubGraph( name='scheduled trigger', description='create a new scheduled trigger entity', related_type=EntityType.workflow, related_is_a=Relationship.CHILD, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ]) entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Trigger(id=Ref.trigger(1), data=TriggerData( name='%s_trigger' % super_trigger.name, trigger_type_name=super_trigger.name, state=TriggerState.INACTIVE, workflow_ids=[Ref.child()], )) ]) sub_graph_map[EntityType.workflow].extend([ SubGraph( name='super trigger', description='create a new super trigger entity', related_type=EntityType.workflow, related_is_a=Relationship.CHILD, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='▼', ), ]) entity_models = graph_entity_service.to_entity_models_with_randomized_ids([ Workflow(id=Ref.workflow(1), data=WorkflowData(name='workflow', datastore_id=Ref.parent(), engine_name=engine_name, state=WorkflowState.INACTIVE)) ]) sub_graph_map[EntityType.datastore] = [ SubGraph( name='workflow', description='create a new workflow entity', related_type=EntityType.datastore, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='◆', ) ] for action_type in engine.data.supported_action_types: entity_models = graph_entity_service.to_entity_models_with_randomized_ids( [ Action( id=Ref.action(1), data=ActionData( name=action_type.name, action_type_name=action_type.name, engine_name=engine_name, datastore_id=Ref.parent(), state=ActionState.HAS_NEVER_RUN, args={} if action_type.params_json_schema else None)) ]) sub_graph_map[EntityType.datastore].append( SubGraph( name=action_type.name, description=action_type.description, related_type=EntityType.datastore, related_is_a=Relationship.PARENT, graph=graph_entity_service.to_graph(None, entity_models), entity_map=graph_entity_service.to_entity_map(entity_models), icon='●', )) return sub_graph_map
def test_action_model_with_no_data(self): self.actionModel = Action(id="1", data={}, version_id=2) self.assertEqual( str(self.actionModel), "updated='None', data='{}', id='1', version_id='2', created='None'" )
assert subscription.data.state == SubscriptionState.ACTIVE print 'done.' datastore = dart.save_datastore( Datastore(data=DatastoreData( name='owen_eu_parquet_DW-3213_v3', engine_name='emr_engine', state=DatastoreState.ACTIVE, args={ # 'data_to_freespace_ratio': 0.05, 'instance_count': 3, }))) print 'created datastore: %s' % datastore.id a0, a1 = dart.save_actions(actions=[ Action(data=ActionData('start_datastore', 'start_datastore')), Action(data=ActionData( 'load_dataset', 'load_dataset', args={ 'dataset_id': dataset.id, 's3_path_end_prefix_exclusive': 's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/', 'target_file_format': FileFormat.PARQUET, 'target_row_format': RowFormat.NONE, 'target_compression': Compression.SNAPPY, })), ], datastore_id=datastore.id) print 'created action: %s' % a0.id print 'created action: %s' % a1.id
def setUp(self): dart = Dart(host='localhost', port=5000) """ :type dart: dart.client.python.dart_client.Dart """ self.dart = dart cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs) self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset1_location = 's3://test/dataset/1/%s' + random_id() dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs) self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data)) cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)] df = DataFormat(FileFormat.PARQUET, RowFormat.NONE) dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs) self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data)) s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id)) self.subscription0 = self.dart.save_subscription(s) s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id)) self.subscription2 = self.dart.save_subscription(s) dst_args = {'action_sleep_time_in_seconds': 0} dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore0 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE)) self.datastore1 = self.dart.save_datastore(dst) dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE)) self.datastore2 = self.dart.save_datastore(dst) wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE)) self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id) wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE)) self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id) wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE)) self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id) a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location} a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE)) a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE)) a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE)) self.action00, self.action01, self.action02, self.action03, self.action04 = \ self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id) a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE)) self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id) a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN)) a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE)) self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id) self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id) self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE))) self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE))) tr_args = {'event_id': self.event1.id} tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE)) self.event_trigger1 = self.dart.save_trigger(tr) tr_args = {'event_id': self.event2.id} tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE)) self.event_trigger2 = self.dart.save_trigger(tr) st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE)) self.super_trigger1 = self.dart.save_trigger(st) st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]} st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE)) self.super_trigger2 = self.dart.save_trigger(st)
def setUp(self): self.actionModel = Action() self.actionDataModel = ActionData(name="action data name", action_type_name="test")