def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst0 = Datastore(data=DatastoreData('test-datastore0',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst0)
        dst1 = Datastore(data=DatastoreData('test-datastore1',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst1)

        wf0 = Workflow(data=WorkflowData(
            'test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData(
            'test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)

        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action00, self.action01 = self.dart.save_actions(
            [a00, a01], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a11 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action10, self.action11 = self.dart.save_actions(
            [a10, a11], workflow_id=self.workflow1.id)

        tr_args = {'completed_workflow_id': self.workflow0.id}
        tr = Trigger(data=TriggerData('test-trigger', 'workflow_completion',
                                      None, tr_args, TriggerState.ACTIVE))
        self.trigger = self.dart.save_trigger(tr)

        st_args = {
            'fire_after': 'ALL',
            'completed_trigger_ids': [self.trigger.id]
        }
        st = Trigger(data=TriggerData('test-super-trigger', 'super',
                                      [self.workflow1.id], st_args,
                                      TriggerState.ACTIVE))
        self.super_trigger = self.dart.save_trigger(st)
Beispiel #2
0
    def test_create_cluster_with_configuration_overrides(self, mock_call):
        mock_call.return_value = json.dumps({'ClusterId': '123'})

        datastore = Datastore()
        datastore.data = DatastoreData(
            name='test_create_cluster_with_configurations_datastore')
        datastore.data.args = {
            "data_to_freespace_ratio": 0.5,
            "dry_run": False,
            "instance_count": 1,
            "instance_type": "m3.xlarge",
            "release_label": "emr-4.8.2"
        }

        emr_engine = EmrEngine(
            ec2_keyname='xxx-yyy-ec2-key-pair-rpt',
            instance_profile=
            'xxx-yyy-iam-rpt-1-UdsEc2InstanceProfile-1SIA38TXQ7OY1',
            service_role=
            'xxx-yyy-iam-rpt-1-UdsInstanceProfileRole-FX98BLTMCK60',
            region='region',
            core_node_limit=30,
            impala_docker_repo_base_url='111111111111.wwww/xxx',
            impala_version='2.3.0',
            cluster_tags={
                'Name': 'xxx-yyy-uds',
                'Product': 'xxx',
                'Function': 'a-b',
                'Accounting': '222-1111111'
            },
            subnet_id='subnetid12345',
            dart_host='somehost',
            dart_port=5000)

        instance_groups_args = [
            (1, 'MASTER', 'm3.xlarge', 'ON_DEMAND', 'master'),
            (1, 'CORE', 'm3.xlarge', 'ON_DEMAND', 'core'),
        ]

        create_cluster(bootstrap_actions_args=[],
                       cluster_name="test_create_cluster_with_configurations",
                       datastore=datastore,
                       emr_engine=emr_engine,
                       instance_groups_args=instance_groups_args,
                       configuration_overrides=self.CONFIGURATION_OVERRIDES)

        mock_call.assert_called_once()
        args = mock_call.call_args
        cmd_list = args[0][0].split(' ')
        pos = cmd_list.index('--configurations')
        file_url = cmd_list[pos + 1]
        self.check_extra_configs(file_url)
    def test_datastore_schema(self):
        dst = Datastore(data=DatastoreData('test-datastore', 'fake_engine', args={'data_to_freespace_ratio': 0, 'secret': 'hi'}))
        obj_before = dst.to_dict()
        schema = datastore_schema(self.options_json_schema)
        dst = default_and_validate(dst, schema)
        obj_after = dst.to_dict()
        self.assertNotEqual(obj_before, obj_after)

        self.assertEqual(obj_after['data']['args']['secret'], 'hi')
        secrets = {}
        purge_secrets(obj_after, schema, secrets)
        self.assertEqual(obj_after['data']['args'].get('secret'), None)
        self.assertEqual(secrets, {'secret': 'hi'})
    def test_create_cluster_with_configuration_overrides(self, mock_call):
        mock_call.return_value = json.dumps({'ClusterId': '123'})

        datastore = Datastore()
        datastore.data = DatastoreData(name='test_create_cluster_with_configurations_datastore')
        datastore.data.args = {
            "data_to_freespace_ratio": 0.5,
            "dry_run": False,
            "instance_count": 1,
            "instance_type": "m3.xlarge",
            "release_label": "emr-4.8.2"
        }

        emr_engine = EmrEngine(
            ec2_keyname='xxx-yyy-ec2-key-pair-rpt',
            instance_profile='xxx-yyy-iam-rpt-1-UdsEc2InstanceProfile-1SIA38TXQ7OY1',
            service_role='xxx-yyy-iam-rpt-1-UdsInstanceProfileRole-FX98BLTMCK60',
            region='region',
            core_node_limit=30,
            impala_docker_repo_base_url='111111111111.wwww/xxx',
            impala_version='2.3.0',
            cluster_tags={
                'Name': 'xxx-yyy-uds',
                'Product': 'xxx',
                'Function': 'a-b',
                'Accounting': '222-1111111'
            },
            subnet_id='subnetid12345',
            dart_host='somehost',
            dart_port=5000
        )

        instance_groups_args = [
            (1, 'MASTER', 'm3.xlarge', 'ON_DEMAND', 'master'),
            (1, 'CORE', 'm3.xlarge', 'ON_DEMAND', 'core'),
        ]

        create_cluster(bootstrap_actions_args=[],
                       cluster_name="test_create_cluster_with_configurations",
                       datastore=datastore,
                       emr_engine=emr_engine,
                       instance_groups_args=instance_groups_args,
                       configuration_overrides=self.CONFIGURATION_OVERRIDES)

        mock_call.assert_called_once()
        args = mock_call.call_args
        cmd_list = args[0][0].split(' ')
        pos = cmd_list.index('--configurations')
        file_url = cmd_list[pos+1]
        self.check_extra_configs(file_url)
Beispiel #5
0
 def setUp(self):
     self.dart = Dart(host='localhost', port=5000)
     args = {'action_sleep_time_in_seconds': 0}
     dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=args, state=DatastoreState.TEMPLATE))
     self.datastore = self.dart.save_datastore(dst)
     wf = Workflow(data=WorkflowData('test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
     self.workflow = self.dart.save_workflow(wf, self.datastore.id)
Beispiel #6
0
    def save_datastore(self, datastore, commit_and_handle_state_change=True, flush=False):
        """ :type datastore: dart.model.datastore.Datastore """
        schema = self.get_schema(datastore)
        datastore = self.default_and_validate_datastore(datastore, schema)
        datastore.id = random_id()

        secrets = {}
        datastore_dict = datastore.to_dict()
        purge_secrets(datastore_dict, schema, secrets)
        datastore = Datastore.from_dict(datastore_dict)
        for k, v in secrets.iteritems():
            self._secrets.put('dart-datastore-%s-%s' % (datastore.id, k), v)

        self._set_s3_paths(datastore)
        datastore_dao = DatastoreDao()
        datastore_dao.id = datastore.id
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        if flush:
            db.session.flush()
        datastore = datastore_dao.to_model()
        if commit_and_handle_state_change:
            db.session.commit()
            datastore = datastore_dao.to_model()
            self.handle_datastore_state_change(datastore, None, datastore_dao.data['state'])
        return datastore
Beispiel #7
0
    def test_crud(self):
        dst = Datastore(
            data=DatastoreData(name='test-datastore',
                               engine_name='no_op_engine',
                               args={'action_sleep_time_in_seconds': 0},
                               tags=['foo']))
        posted_datastore = self.dart.save_datastore(dst)

        # copy fields that are populated at creation time
        dst.data.s3_artifacts_path = posted_datastore.data.s3_artifacts_path
        dst.data.s3_logs_path = posted_datastore.data.s3_logs_path
        self.assertEqual(posted_datastore.data.to_dict(), dst.data.to_dict())

        datastore = self.dart.get_datastore(posted_datastore.id)
        self.assertEqual(posted_datastore.to_dict(), datastore.to_dict())

        datastore.data.engine_name = 'not_existing_engine'
        datastore.data.state = DatastoreState.ACTIVE
        put_datastore = self.dart.save_datastore(datastore)
        # not all properties can be modified
        self.assertEqual(put_datastore.data.engine_name, 'no_op_engine')
        self.assertEqual(put_datastore.data.state, DatastoreState.ACTIVE)
        self.assertNotEqual(posted_datastore.to_dict(),
                            put_datastore.to_dict())

        self.dart.delete_datastore(datastore.id)
        try:
            self.dart.get_datastore(datastore.id)
        except DartRequestException as e:
            self.assertEqual(e.response.status_code, 404)
            return

        self.fail('datastore should have been missing after delete!')
Beispiel #8
0
    def save_datastore(self,
                       datastore,
                       commit_and_handle_state_change=True,
                       flush=False):
        """ :type datastore: dart.model.datastore.Datastore """
        schema = self.get_schema(datastore)
        datastore = self.default_and_validate_datastore(datastore, schema)
        datastore.id = random_id()

        secrets = {}
        datastore_dict = datastore.to_dict()
        purge_secrets(datastore_dict, schema, secrets)
        datastore = Datastore.from_dict(datastore_dict)
        for k, v in secrets.iteritems():
            self._secrets.put('dart-datastore-%s-%s' % (datastore.id, k), v)

        self._set_s3_paths(datastore)
        datastore_dao = DatastoreDao()
        datastore_dao.id = datastore.id
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        if flush:
            db.session.flush()
        datastore = datastore_dao.to_model()
        if commit_and_handle_state_change:
            db.session.commit()
            datastore = datastore_dao.to_model()
            self.handle_datastore_state_change(datastore, None,
                                               datastore_dao.data['state'])
        return datastore
Beispiel #9
0
def patch_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    p = JsonPatch(request.get_json())
    sanitized_datastore = datastore.copy()
    patched_datastore = Datastore.from_dict(p.apply(datastore.to_dict()))

    # only allow updating fields that are editable
    sanitized_datastore.data.name = patched_datastore.data.name
    sanitized_datastore.data.host = patched_datastore.data.host
    sanitized_datastore.data.port = patched_datastore.data.port
    sanitized_datastore.data.connection_url = patched_datastore.data.connection_url
    sanitized_datastore.data.state = patched_datastore.data.state
    sanitized_datastore.data.concurrency = patched_datastore.data.concurrency
    sanitized_datastore.data.args = patched_datastore.data.args
    sanitized_datastore.data.extra_data = patched_datastore.data.extra_data
    sanitized_datastore.data.tags = patched_datastore.data.tags

    # revalidate
    sanitized_datastore = datastore_service().default_and_validate_datastore(
        sanitized_datastore)

    return {
        'results':
        datastore_service().patch_datastore(datastore,
                                            sanitized_datastore).to_dict()
    }
Beispiel #10
0
 def setUp(self):
     self.dart = Dart(host='localhost', port=5000)
     args = {'action_sleep_time_in_seconds': 0}
     dst = Datastore(data=DatastoreData(name='test-datastore',
                                        engine_name='no_op_engine',
                                        args=args,
                                        state=DatastoreState.ACTIVE))
     self.datastore = self.dart.save_datastore(dst)
Beispiel #11
0
    def test_datastore_schema(self):
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'fake_engine',
                                           args={
                                               'data_to_freespace_ratio': 0,
                                               'secret': 'hi'
                                           }))
        obj_before = dst.to_dict()
        schema = datastore_schema(self.options_json_schema)
        dst = default_and_validate(dst, schema)
        obj_after = dst.to_dict()
        self.assertNotEqual(obj_before, obj_after)

        self.assertEqual(obj_after['data']['args']['secret'], 'hi')
        secrets = {}
        purge_secrets(obj_after, schema, secrets)
        self.assertEqual(obj_after['data']['args'].get('secret'), None)
        self.assertEqual(secrets, {'secret': 'hi'})
Beispiel #12
0
 def patch_datastore(self, source_datastore, datastore):
     schema = self.get_schema(datastore)
     secrets = {}
     datastore_dict = datastore.to_dict()
     purge_secrets(datastore_dict, schema, secrets)
     datastore = Datastore.from_dict(datastore_dict)
     datastore = patch_difference(DatastoreDao, source_datastore, datastore)
     self.handle_datastore_state_change(datastore, source_datastore.data.state, datastore.data.state)
     return datastore
Beispiel #13
0
    def test_datastore_schema_invalid(self):
        with self.assertRaises(DartValidationException) as context:
            dst = Datastore(
                data=DatastoreData('test-datastore',
                                   'fake_engine',
                                   args={'data_to_freespace_ratio_yo': 0}))
            default_and_validate(dst,
                                 datastore_schema(self.options_json_schema))

        self.assertTrue(isinstance(context.exception, DartValidationException))
Beispiel #14
0
def put_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    updated_datastore = Datastore.from_dict(request.get_json())
    if datastore.data.state == DatastoreState.TEMPLATE and updated_datastore.data.state != DatastoreState.TEMPLATE:
        return {'results': 'ERROR', 'error_message': 'TEMPLATE state cannot be changed'}, 400, None
    if updated_datastore.data.state not in [DatastoreState.ACTIVE, DatastoreState.INACTIVE, DatastoreState.DONE]:
        return {'results': 'ERROR', 'error_message': 'state must be ACTIVE, INACTIVE, or DONE'}, 400, None

    datastore = datastore_service().update_datastore_extra_data(datastore, updated_datastore.data.extra_data)
    return {'results': datastore_service().update_datastore_state(datastore, updated_datastore.data.state).to_dict()}
Beispiel #15
0
 def _resolve_and_save_datastore(self, entity_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id):
     actual_id, unsaved_id = self._resolve(EntityType.datastore, entity_id, entity_map, actual_entities_by_unsaved_id)
     if actual_id:
         return actual_id
     node_id = self._node_id(EntityType.datastore, unsaved_id)
     datastore = Datastore.from_dict(entity_map['unsaved_entities'][node_id])
     datastore = self._datastore_service.save_datastore(datastore, commit_and_handle_state_change=False, flush=True)
     actual_entities_by_node_id[node_id] = datastore
     actual_entities_by_unsaved_id[unsaved_id] = datastore
     return datastore.id
Beispiel #16
0
 def _resolve_and_save_datastore(self, entity_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id):
     actual_id, unsaved_id = self._resolve(EntityType.datastore, entity_id, entity_map, actual_entities_by_unsaved_id)
     if actual_id:
         return actual_id
     node_id = self._node_id(EntityType.datastore, unsaved_id)
     datastore = Datastore.from_dict(entity_map['unsaved_entities'][node_id])
     datastore = self._datastore_service.save_datastore(datastore, commit_and_handle_state_change=False, flush=True)
     actual_entities_by_node_id[node_id] = datastore
     actual_entities_by_unsaved_id[unsaved_id] = datastore
     return datastore.id
Beispiel #17
0
 def patch_datastore(self, source_datastore, datastore):
     schema = self.get_schema(datastore)
     secrets = {}
     datastore_dict = datastore.to_dict()
     purge_secrets(datastore_dict, schema, secrets)
     datastore = Datastore.from_dict(datastore_dict)
     datastore = patch_difference(DatastoreDao, source_datastore, datastore)
     self.handle_datastore_state_change(datastore,
                                        source_datastore.data.state,
                                        datastore.data.state)
     return datastore
Beispiel #18
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.ACTIVE))
        self.datastore = self.dart.save_datastore(dst)
Beispiel #19
0
 def setUp(self):
     self.dart = Dart(host='localhost', port=5000)
     args = {'action_sleep_time_in_seconds': 0}
     dst = Datastore(data=DatastoreData(name='test-datastore',
                                        engine_name='no_op_engine',
                                        args=args,
                                        state=DatastoreState.TEMPLATE))
     self.datastore = self.dart.save_datastore(dst)
     wf = Workflow(data=WorkflowData(name='test-workflow',
                                     datastore_id=self.datastore.id))
     self.workflow = self.dart.save_workflow(workflow=wf,
                                             datastore_id=self.datastore.id)
     self.maxDiff = 99999
Beispiel #20
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [
            Column('c1', DataType.VARCHAR, 50),
            Column('c2', DataType.BIGINT)
        ]
        df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED)
        dataset_data = DatasetData(
            name='test-dataset',
            table_name='test_dataset_table',
            load_type=LoadType.INSERT,
            location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'),
            data_format=df,
            columns=cs,
            tags=[])
        self.dataset = self.dart.save_dataset(Dataset(data=dataset_data))

        start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala'
        end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install'
        regex = '.*\\.rpm'
        ds = Subscription(data=SubscriptionData(
            'test-subscription', self.dataset.id, start, end, regex))
        self.subscription = self.dart.save_subscription(ds)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.TEMPLATE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a_args = {'subscription_id': self.subscription.id}
        a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                    NoOpActionTypes.action_that_succeeds.name,
                                    state=ActionState.TEMPLATE))
        a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name,
                                    NoOpActionTypes.consume_subscription.name,
                                    a_args,
                                    state=ActionState.TEMPLATE))
        self.action0, self.action1 = self.dart.save_actions(
            [a0, a1], workflow_id=self.workflow.id)
Beispiel #21
0
    def clone_datastore(self, source_datastore, **data_property_overrides):
        datastore = Datastore.from_dict(source_datastore.to_dict())
        datastore.data.state = DatastoreState.INACTIVE
        datastore.data.host = None
        datastore.data.port = None
        datastore.data.username = None
        datastore.data.password = None
        datastore.data.connection_url = None
        datastore.data.extra_data = None
        self._set_s3_paths(datastore)
        for k, v in data_property_overrides.iteritems():
            setattr(datastore.data, k, v)

        datastore_dao = DatastoreDao()
        datastore_dao.id = random_id()
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        db.session.commit()
        return datastore_dao.to_model()
Beispiel #22
0
    def clone_datastore(self, source_datastore, **data_property_overrides):
        datastore = Datastore.from_dict(source_datastore.to_dict())
        datastore.data.state = DatastoreState.INACTIVE
        datastore.data.host = None
        datastore.data.port = None
        datastore.data.username = None
        datastore.data.password = None
        datastore.data.connection_url = None
        datastore.data.extra_data = None
        self._set_s3_paths(datastore)
        for k, v in data_property_overrides.iteritems():
            setattr(datastore.data, k, v)

        datastore_dao = DatastoreDao()
        datastore_dao.id = random_id()
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        db.session.commit()
        return datastore_dao.to_model()
Beispiel #23
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.ACTIVE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a = Action(data=ActionData(NoOpActionTypes.action_that_fails.name,
                                   NoOpActionTypes.action_that_fails.name,
                                   state=ActionState.TEMPLATE))
        self.dart.save_actions([a], workflow_id=self.workflow.id)
Beispiel #24
0
def patch_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    p = JsonPatch(request.get_json())
    sanitized_datastore = datastore.copy()
    patched_datastore = Datastore.from_dict(p.apply(datastore.to_dict()))

    # only allow updating fields that are editable
    sanitized_datastore.data.name = patched_datastore.data.name
    sanitized_datastore.data.host = patched_datastore.data.host
    sanitized_datastore.data.port = patched_datastore.data.port
    sanitized_datastore.data.connection_url = patched_datastore.data.connection_url
    sanitized_datastore.data.state = patched_datastore.data.state
    sanitized_datastore.data.concurrency = patched_datastore.data.concurrency
    sanitized_datastore.data.args = patched_datastore.data.args
    sanitized_datastore.data.extra_data = patched_datastore.data.extra_data
    sanitized_datastore.data.tags = patched_datastore.data.tags

    # revalidate
    sanitized_datastore = datastore_service().default_and_validate_datastore(sanitized_datastore)

    return {'results': datastore_service().patch_datastore(datastore, sanitized_datastore).to_dict()}
    def test_consume_subscription_exceptions(self):
        a = Action(id='abc123', data=ActionData('a_name', 'a_name', workflow_instance_id='abc123', args={'subscription_id': 0}))
        d = Datastore(id='abc123')

        mock_engine = self.init_mocks(Mock(side_effect=Exception()))
        with self.assertRaises(Exception):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=Exception()))
        with self.assertRaises(ActionFailedButConsumeSuccessfulException):
            consume_subscription(mock_engine, d, a, consume_successful=True)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, False))))
        with self.assertRaises(Exception):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()

        mock_engine = self.init_mocks(Mock(side_effect=DartActionException('failed', StepWrapper(None, 0, 0, True))))
        with self.assertRaises(ActionFailedButConsumeSuccessfulException):
            consume_subscription(mock_engine, d, a)
        mock_engine.dart.patch_action.assert_not_called()
Beispiel #26
0
def put_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    updated_datastore = Datastore.from_dict(request.get_json())
    if datastore.data.state == DatastoreState.TEMPLATE and updated_datastore.data.state != DatastoreState.TEMPLATE:
        return {
            'results': 'ERROR',
            'error_message': 'TEMPLATE state cannot be changed'
        }, 400, None
    if updated_datastore.data.state not in [
            DatastoreState.ACTIVE, DatastoreState.INACTIVE, DatastoreState.DONE
    ]:
        return {
            'results': 'ERROR',
            'error_message': 'state must be ACTIVE, INACTIVE, or DONE'
        }, 400, None

    datastore = datastore_service().update_datastore_extra_data(
        datastore, updated_datastore.data.extra_data)
    return {
        'results':
        datastore_service().update_datastore_state(
            datastore, updated_datastore.data.state).to_dict()
    }
Beispiel #27
0
def get_static_subgraphs_by_engine_name_all_engines_related_none(
        engine_names, graph_entity_service):
    sub_graph_map = {}
    for engine_name in engine_names:
        entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
            [
                Datastore(id=Ref.datastore(1),
                          data=DatastoreData(name='%s_datastore' % engine_name,
                                             engine_name=engine_name,
                                             state=DatastoreState.INACTIVE))
            ])
        sub_graph_map[engine_name] = [
            SubGraph(
                name='datastore',
                description='create a new datastore entity',
                related_type=None,
                related_is_a=None,
                graph=graph_entity_service.to_graph(None, entity_models),
                entity_map=graph_entity_service.to_entity_map(entity_models),
                icon='⬟',
            )
        ]
    return sub_graph_map
Beispiel #28
0
            's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/',
            on_success_email=['*****@*****.**'],
            on_failure_email=['*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='owen_eu_parquet_DW-3213_v3',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'data_to_freespace_ratio': 0.05,
                'instance_count': 3,
            })))
    print 'created datastore: %s' % datastore.id

    a0, a1 = dart.save_actions(actions=[
        Action(data=ActionData('start_datastore', 'start_datastore')),
        Action(data=ActionData(
            'load_dataset',
            'load_dataset',
            args={
                'dataset_id': dataset.id,
                's3_path_end_prefix_exclusive':
                's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/',
                'target_file_format': FileFormat.PARQUET,
Beispiel #29
0
def put_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    return update_datastore(datastore, Datastore.from_dict(request.get_json()))
            Column('featuredCouponPosition', DataType.INT),
            Column('commentCount', DataType.INT),
            Column('mallCount', DataType.INT),
            Column('clickCount', DataType.INT),
            Column('merchantName', DataType.STRING),
            Column('merchantPosition', DataType.INT),
        ],
        compression=Compression.SNAPPY,
        partitions=[Column('createdpartition', DataType.STRING)],
    ))))
    print 'created dataset: %s' % dataset.id

    datastore = dart.save_datastore(Datastore(
        data=DatastoreData(
            'beacon_native_app_impala',
            'emr_engine',
            state=DatastoreState.TEMPLATE,
            args={'data_to_freespace_ratio': 0.25}
        )
    ))
    print 'created datastore: %s' % datastore.id

    workflow = dart.save_workflow(Workflow(
        data=WorkflowData(
            'load_beacon_native_app_impala',
            datastore.id,
            state=WorkflowState.ACTIVE,
            on_failure_email=['*****@*****.**'],
            on_success_email=['*****@*****.**'],
            on_started_email=['*****@*****.**'],
        )
    ), datastore.id)
Beispiel #31
0
def patch_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    p = JsonPatch(request.get_json())
    return update_datastore(datastore, Datastore.from_dict(p.apply(datastore.to_dict())))
Beispiel #32
0
            's3://example-bucket/prd/inbound/overlord/raw/rmndirect/2015/08/18/',
            on_success_email=['*****@*****.**'],
            on_failure_email=['*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='rmn_direct_adhoc',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'data_to_freespace_ratio': 0.10,
                'instance_count': 2,
            })))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(actions=[
        Action(data=ActionData('start_datastore', 'start_datastore')),
        Action(data=ActionData(
            'load_dataset',
            'load_dataset',
            args={
                'dataset_id': '34HWJLF5N9',
                's3_path_end_prefix_exclusive':
                's3://example-bucket/prd/inbound/overlord/raw/rmndirect/2015/08/18/',
                'target_file_format': FileFormat.PARQUET,
Beispiel #33
0
def post_datastore():
    return {'results': datastore_service().save_datastore(Datastore.from_dict(request.get_json())).to_dict()}
Beispiel #34
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs)
        self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset1_location = 's3://test/dataset/1/%s' + random_id()
        dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs)
        self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs)
        self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data))

        s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id))
        self.subscription0 = self.dart.save_subscription(s)

        s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id))
        self.subscription2 = self.dart.save_subscription(s)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE))
        self.datastore2 = self.dart.save_datastore(dst)

        wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)
        wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE))
        self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id)

        a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location}
        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE))
        a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE))
        a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        self.action00, self.action01, self.action02, self.action03, self.action04 = \
            self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE))
        self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id)

        a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN))
        a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE))
        self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id)
        self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id)

        self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE)))
        self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE)))

        tr_args = {'event_id': self.event1.id}
        tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger1 = self.dart.save_trigger(tr)

        tr_args = {'event_id': self.event2.id}
        tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger2 = self.dart.save_trigger(tr)

        st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE))
        self.super_trigger1 = self.dart.save_trigger(st)

        st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE))
        self.super_trigger2 = self.dart.save_trigger(st)
from dart.model.action import ActionData
from dart.model.dataset import FileFormat, Compression
from dart.model.dataset import RowFormat
from dart.model.datastore import Datastore
from dart.model.datastore import DatastoreData
from dart.model.datastore import DatastoreState

if __name__ == '__main__':
    dart = Dart('localhost', 5000)
    assert isinstance(dart, Dart)

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='amaceiras_beacon_native_app_null_coupons_issue',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'data_to_freespace_ratio': 0.05,
                'instance_count': 5,
            })))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData(
                'load_dataset',
                'load_dataset',
                args={
                    'dataset_id': 'URBA9XEQEF',
                    's3_path_start_prefix_inclusive':
                    's3://example-bucket/nb.retailmenot.com/parsed_logs/2015/33/beacon-v2-2015-08-18',
Beispiel #36
0
def patch_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    p = JsonPatch(request.get_json())
    return update_datastore(datastore, Datastore.from_dict(p.apply(datastore.to_dict())))
Beispiel #37
0
def put_datastore(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    return update_datastore(datastore, Datastore.from_dict(request.get_json()))
Beispiel #38
0
            Column('fruitSlice', DataType.STRING),
            Column('cacheHitMiss', DataType.STRING),
        ],
        compression=Compression.BZ2,
        partitions=[
            Column('year', DataType.STRING),
            Column('week', DataType.STRING),
        ],
    )))
    print 'created dataset: %s' % dataset.id

    datastore = dart.save_datastore(Datastore(
        data=DatastoreData(
            name='weblogs_DW-3503',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                'data_to_freespace_ratio': 0.30,
            }
        )
    ))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData('load_dataset', 'load_dataset', args={
                'dataset_id': dataset.id,
                's3_path_start_prefix_inclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/50',
                's3_path_end_prefix_exclusive': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2015/00',
                's3_path_regex_filter': 's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/../www\\.retailmenot\\.com.*',
                'target_file_format': FileFormat.TEXTFILE,
Beispiel #39
0
            name='weblogs_rmn_subscription',
            dataset_id=dataset.id,
            on_failure_email=['*****@*****.**', '*****@*****.**'],
            on_success_email=['*****@*****.**', '*****@*****.**'],
        )))
    print 'created subscription: %s' % subscription.id

    print 'awaiting subscription generation...'
    subscription = dart.await_subscription_generation(subscription.id)
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(name='weblogs_rmn_legacy',
                                     engine_name='emr_engine',
                                     state=DatastoreState.TEMPLATE,
                                     args={
                                         'data_to_freespace_ratio': 0.50,
                                     })))
    print 'created datastore: %s' % datastore.id

    workflow = dart.save_workflow(workflow=Workflow(data=WorkflowData(
        name='weblogs_rmn_legacy_parse_to_delimited',
        datastore_id=datastore.id,
        state=WorkflowState.ACTIVE,
        on_failure_email=['*****@*****.**', '*****@*****.**'],
        on_success_email=['*****@*****.**', '*****@*****.**'],
        on_started_email=['*****@*****.**', '*****@*****.**'],
    )),
                                  datastore_id=datastore.id)
    print 'created workflow: %s' % workflow.id
Beispiel #40
0
def post_datastore():
    return {'results': datastore_service().save_datastore(Datastore.from_dict(request.get_json())).to_dict()}
Beispiel #41
0
                Column('fruitSlice', DataType.STRING),
                Column('cacheHitMiss', DataType.STRING),
            ],
            compression=Compression.BZ2,
            partitions=[
                Column('year', DataType.STRING),
                Column('week', DataType.STRING),
            ],
        )))
    print 'created dataset: %s' % dataset.id

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(
            name='weblogs_DW-3500_holiday_readiness',
            engine_name='emr_engine',
            state=DatastoreState.ACTIVE,
            args={
                # 'instance_count': 30,
                'data_to_freespace_ratio': 0.30,
            })))
    print 'created datastore: %s' % datastore.id

    actions = dart.save_actions(
        actions=[
            Action(data=ActionData('start_datastore', 'start_datastore')),
            Action(data=ActionData(
                'load_dataset',
                'load_dataset',
                args={
                    'dataset_id': dataset.id,
                    's3_path_start_prefix_inclusive':
                    's3://example-bucket/weblogs/www.retailmenot.com/ec2/2014/30',