Exemple #1
0
 def conditional_insert_subscription_element(subscription, s3_path, size):
     # SQLAlchemy does not support conditional inserts as a part of its expression language.  Furthermore,
     # this form of conditional update is required until postgres 9.5 is out and supported by RDS:
     #
     #    http://www.postgresql.org/docs/devel/static/sql-insert.html#SQL-ON-CONFLICT
     #
     sql = """
         INSERT INTO subscription_element (
             id,
             version_id,
             created,
             updated,
             subscription_id,
             s3_path,
             file_size,
             state
         )
         SELECT :id, 0, NOW(), NOW(), :sid, :s3_path, :size, :state
         WHERE NOT EXISTS
             (SELECT NULL FROM subscription_element WHERE subscription_id = :sid AND s3_path = :s3_path)
         """
     sid = subscription.id
     state = SubscriptionElementState.UNCONSUMED
     statement = text(sql).bindparams(id=random_id(), sid=sid, s3_path=s3_path, size=size, state=state)
     results = db.session.execute(statement)
     if results.rowcount != 1:
         db.session.rollback()
         return False
     else:
         db.session.commit()
         return True
Exemple #2
0
    def save_datastore(self, datastore, commit_and_handle_state_change=True, flush=False):
        """ :type datastore: dart.model.datastore.Datastore """
        schema = self.get_schema(datastore)
        datastore = self.default_and_validate_datastore(datastore, schema)
        datastore.id = random_id()

        secrets = {}
        datastore_dict = datastore.to_dict()
        purge_secrets(datastore_dict, schema, secrets)
        datastore = Datastore.from_dict(datastore_dict)
        for k, v in secrets.iteritems():
            self._secrets.put('dart-datastore-%s-%s' % (datastore.id, k), v)

        self._set_s3_paths(datastore)
        datastore_dao = DatastoreDao()
        datastore_dao.id = datastore.id
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        if flush:
            db.session.flush()
        datastore = datastore_dao.to_model()
        if commit_and_handle_state_change:
            db.session.commit()
            datastore = datastore_dao.to_model()
            self.handle_datastore_state_change(datastore, None, datastore_dao.data['state'])
        return datastore
Exemple #3
0
    def save_datastore(self,
                       datastore,
                       commit_and_handle_state_change=True,
                       flush=False):
        """ :type datastore: dart.model.datastore.Datastore """
        schema = self.get_schema(datastore)
        datastore = self.default_and_validate_datastore(datastore, schema)
        datastore.id = random_id()

        secrets = {}
        datastore_dict = datastore.to_dict()
        purge_secrets(datastore_dict, schema, secrets)
        datastore = Datastore.from_dict(datastore_dict)
        for k, v in secrets.iteritems():
            self._secrets.put('dart-datastore-%s-%s' % (datastore.id, k), v)

        self._set_s3_paths(datastore)
        datastore_dao = DatastoreDao()
        datastore_dao.id = datastore.id
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        if flush:
            db.session.flush()
        datastore = datastore_dao.to_model()
        if commit_and_handle_state_change:
            db.session.commit()
            datastore = datastore_dao.to_model()
            self.handle_datastore_state_change(datastore, None,
                                               datastore_dao.data['state'])
        return datastore
Exemple #4
0
    def save_trigger(self, trigger, commit_and_initialize=True, flush=False):
        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type().name:
            raise DartValidationException('manual triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' % trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(trigger, trigger_schema(trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
Exemple #5
0
    def save_trigger(self, trigger, commit_and_initialize=True, flush=False):
        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type(
        ).name:
            raise DartValidationException('manual triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' %
                                          trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(
            trigger,
            trigger_schema(
                trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
    def save_accounting_event(accounting_event, commit=True, flush=False):
        """ :type accounting_event: dart.model.accounting.Accounting """

        accounting_dao = AccountingDao()
        accounting_dao.id = random_id()
        accounting_dao.user_id = accounting_event.user_id
        accounting_dao.state = accounting_event.state
        accounting_dao.entity = accounting_event.entity
        accounting_dao.params = accounting_event.params
        accounting_dao.return_code = accounting_event.return_code
        accounting_dao.api_version = accounting_event.api_version
        accounting_dao.extra = accounting_event.extra

        db.session.add(accounting_dao)
        try:
            if flush:
                db.session.flush()
            if commit:
                db.session.commit()
#            accounting_event = accounting_dao.to_model()
            return accounting_event
        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(e.orig, PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' % dataset.data.name)
            raise e
Exemple #7
0
    def save_actions(self, actions, engine_name, datastore=None, commit=True, flush=False):
        """ :type actions: list[dart.model.action.Action]
            :type datastore: dart.model.datastore.Datastore """

        engine = self._engine_service.get_engine_by_name(engine_name)
        assert isinstance(engine, Engine)
        action_types_by_name = {at.name: at for at in engine.data.supported_action_types}

        action_daos = []
        max_order_idx = ActionService._get_max_order_idx(datastore.id) + 1 if datastore else 0
        for action in actions:
            action.data.engine_name = engine_name
            if not action.data.order_idx:
                action.data.order_idx = max_order_idx
            max_order_idx = action.data.order_idx + 1
            action_dao = ActionDao()
            action_dao.id = random_id()

            action_type = action_types_by_name.get(action.data.action_type_name)
            action = self.default_and_validate_action(action, action_type)

            action_dao.data = action.data.to_dict()
            db.session.add(action_dao)
            action_daos.append(action_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        return [a.to_model() for a in action_daos]
Exemple #8
0
    def save_accounting_event(accounting_event, commit=True, flush=False):
        """ :type accounting_event: dart.model.accounting.Accounting """

        accounting_dao = AccountingDao()
        accounting_dao.id = random_id()
        accounting_dao.user_id = accounting_event.user_id
        accounting_dao.state = accounting_event.state
        accounting_dao.entity = accounting_event.entity
        accounting_dao.params = accounting_event.params
        accounting_dao.return_code = accounting_event.return_code
        accounting_dao.api_version = accounting_event.api_version
        accounting_dao.extra = accounting_event.extra

        db.session.add(accounting_dao)
        try:
            if flush:
                db.session.flush()
            if commit:
                db.session.commit()
#            accounting_event = accounting_dao.to_model()
            return accounting_event
        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(
                    e.orig,
                    PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' %
                                              dataset.data.name)
            raise e
Exemple #9
0
    def save_workflow_instance(workflow, trigger_type, trigger_id, state, log_info=None):
        """ :type workflow: dart.model.workflow.Workflow
            :type trigger_type: dart.model.trigger.TriggerType """
        wf_instance_dao = WorkflowInstanceDao()
        wf_instance_dao.id = random_id()
        wf_data = workflow.data

        wf_data_tags = wf_data.tags if(wf_data.tags) else []
        if (log_info and log_info.get('wf_uuid')):
            wf_data_tags.append(log_info.get('wf_uuid'))

        user_id = 'anonymous'
        if (log_info and log_info.get('user_id')):
            user_id = log_info.get('user_id')


        data = WorkflowInstanceData(
            workflow_id=workflow.id,
            engine_name=wf_data.engine_name,
            state=state,
            trigger_type=trigger_type.name,
            trigger_id=trigger_id,
            queued_time=datetime.now(),
            tags=wf_data_tags,
            user_id=user_id,
        )
        wf_instance_dao.data = data.to_dict()
        db.session.add(wf_instance_dao)
        db.session.commit()
        return wf_instance_dao.to_model()
Exemple #10
0
    def save_workflow_instance(workflow,
                               trigger_type,
                               trigger_id,
                               state,
                               log_info=None,
                               retry_num=0):
        """ :type workflow: dart.model.workflow.Workflow
            :type trigger_type: dart.model.trigger.TriggerType """
        wf_instance_dao = WorkflowInstanceDao()
        wf_instance_dao.id = random_id()
        wf_data = workflow.data

        wf_data_tags = wf_data.tags if (wf_data.tags) else []
        if (log_info and log_info.get('wf_uuid')):
            wf_data_tags.append(log_info.get('wf_uuid'))

        user_id = 'anonymous'
        if (log_info and log_info.get('user_id')):
            user_id = log_info.get('user_id')

        data = WorkflowInstanceData(
            workflow_id=workflow.id,
            engine_name=wf_data.engine_name,
            state=state,
            trigger_type=trigger_type.name,
            trigger_id=trigger_id,
            queued_time=datetime.now(),
            tags=wf_data_tags,
            user_id=user_id,
            retry_num=retry_num,
        )
        wf_instance_dao.data = data.to_dict()
        db.session.add(wf_instance_dao)
        db.session.commit()
        return wf_instance_dao.to_model()
Exemple #11
0
def create_all():
    db.create_all()

    for mutex in Mutexes.all():
        sql = """
            INSERT INTO mutex (id, version_id, created, updated, name, state)
            SELECT :id, 0, NOW(), NOW(), :name, :state
            WHERE NOT EXISTS (SELECT NULL FROM mutex WHERE name = :name)
            """
        statement = text(sql).bindparams(id=random_id(), name=mutex, state=MutexState.READY)
        db.session.execute(statement)
        db.session.commit()
    # config values for dart_client_key/secret are extracted.
    if AUTH_CONFIG.get('use_auth'):
        if AUTH_CONFIG.get('dart_client_key') and AUTH_CONFIG.get('dart_client_secret'):
            _credential = AUTH_CONFIG.get('dart_client_key')
            _secret = AUTH_CONFIG.get('dart_client_secret')
        else:
            raise Exception('dart_client_key and dart_client_secret must both exist.')
    else:
        # The credential/secret default values are set in order to prevent exception while calculating to hmac.
        _credential = 'cred'
        _secret = 'secret'
    # We set a user for dart_client with a apikey/secret (read from config) so that dart_client can work.
    # api auth expects a user to exist in the user table and have an entry in the api_key table (with key/secret values set).
    populate_dart_client_user(DART_CLIENT_NAME)
    populate_dart_client_apikeys(_credential, _secret, DART_CLIENT_NAME)
    # populate user and keys for external service (not dart client)
    populate_user_api_secret_keys()
    return 'OK'
Exemple #12
0
    def save_actions(self, actions, engine_name, datastore=None, commit=True, flush=False):
        """ :type actions: list[dart.model.action.Action]
            :type datastore: dart.model.datastore.Datastore """

        engine = self._engine_service.get_engine_by_name(engine_name)
        assert isinstance(engine, Engine)
        action_types_by_name = {at.name: at for at in engine.data.supported_action_types}

        action_daos = []
        max_order_idx = ActionService._get_max_order_idx(datastore.id) + 1 if datastore else 0
        for action in actions:
            action.data.engine_name = engine_name
            if not action.data.order_idx:
                action.data.order_idx = max_order_idx
            max_order_idx = action.data.order_idx + 1
            action_dao = ActionDao()
            action_dao.id = random_id()

            action_type = action_types_by_name.get(action.data.action_type_name)
            action = self.default_and_validate_action(action, action_type)

            action_dao.data = action.data.to_dict()
            db.session.add(action_dao)
            action_daos.append(action_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        return [a.to_model() for a in action_daos]
 def conditional_insert_subscription_element(subscription, s3_path, size):
     # SQLAlchemy does not support conditional inserts as a part of its expression language.  Furthermore,
     # this form of conditional update is required until postgres 9.5 is out and supported by RDS:
     #
     #    http://www.postgresql.org/docs/devel/static/sql-insert.html#SQL-ON-CONFLICT
     #
     sql = """
         INSERT INTO subscription_element (
             id,
             version_id,
             created,
             updated,
             subscription_id,
             s3_path,
             file_size,
             state
         )
         SELECT :id, 0, NOW(), NOW(), :sid, :s3_path, :size, :state
         WHERE NOT EXISTS
             (SELECT NULL FROM subscription_element WHERE subscription_id = :sid AND s3_path = :s3_path)
         """
     sid = subscription.id
     state = SubscriptionElementState.UNCONSUMED
     statement = text(sql).bindparams(id=random_id(), sid=sid, s3_path=s3_path, size=size, state=state)
     results = db.session.execute(statement)
     if results.rowcount != 1:
         db.session.rollback()
         return False
     else:
         db.session.commit()
         return True
Exemple #14
0
    def generate_subscription_elements(self, subscription):
        """ :type subscription: dart.model.subscription.Subscription """
        _update_subscription_state(subscription, SubscriptionState.GENERATING)

        dataset = self._dataset_service.get_dataset(
            subscription.data.dataset_id)
        conn = boto.connect_s3()
        bucket = get_bucket(conn, dataset.data.location)
        s3_keys = yield_s3_keys(
            bucket,
            dataset.data.location,
            subscription.data.s3_path_start_prefix_inclusive,
            subscription.data.s3_path_end_prefix_exclusive,
            subscription.data.s3_path_regex_filter,
        )
        elements = []
        subscription_element_dict = {}
        for i, key_obj in enumerate(s3_keys):
            sid = subscription.id
            s3_path = get_s3_path(key_obj)
            state = SubscriptionElementState.UNCONSUMED
            now = datetime.now()
            subscription_element_dict = {
                'id': random_id(),
                'version_id': 0,
                'created': now,
                'updated': now,
                'subscription_id': sid,
                's3_path': s3_path,
                'file_size': key_obj.size,
                'state': state
            }
            elements.append(subscription_element_dict)

            batch_size_reached = (i + 1) % _batch_size == 0
            if batch_size_reached:
                self._insert_elements(elements)
                elements = []

        if len(elements) > 0:
            self._insert_elements(elements)

        _update_subscription_state(subscription, SubscriptionState.ACTIVE)

        # Now that the subscription is ACTIVE, s3 events for new files will cause conditional inserts to be
        # performed to keep the subscription up to date.  However, in the time it took for the subscription
        # elements to be generated, s3 events for new objects could have been missed.  So we will do one final
        # s3 list operation (starting with the last inserted key) to fill in the potential gap.
        s3_keys = yield_s3_keys(
            bucket,
            dataset.data.location,
            subscription_element_dict.get('s3_path'),
            subscription.data.s3_path_end_prefix_exclusive,
            subscription.data.s3_path_regex_filter,
        )
        for key_obj in s3_keys:
            self.conditional_insert_subscription_element(
                subscription, get_s3_path(key_obj), key_obj.size)
    def _handle_docker_concerns(self, cwl_image, eng_cfg, misc_log_group_name,
                                output_config, syslog_log_group_name):
        if 'docker' in self.stacks_to_skip:
            _logger.info('skipping docker concerns')
            return

        _logger.info(
            'configuring and building cloudwatch logs docker image (a special snowflake)'
        )
        dart_root = dart_root_relative_path()
        r_id = random_id()
        values = (dart_root, r_id)
        call(
            'cd %s && cd .. && git clone https://github.com/awslabs/ecs-cloudwatch-logs dart-cwl-%s'
            % values)
        docker_init = dart_root_relative_path('tools', 'docker',
                                              'docker-local-init.sh')
        with open(dart_root_relative_path('aws', 'cloudwatch-logs', 'awslogs_template.conf')) as cwl_conf_template, \
                open(dart_root_relative_path('..', 'dart-cwl-%s/awslogs.conf' % r_id), mode='w') as cwl_conf:
            contents = cwl_conf_template.read()
            contents = contents.replace('{DART_LOG_GROUP_SYSLOG}',
                                        syslog_log_group_name)
            contents = contents.replace('{DART_LOG_GROUP_MISC}',
                                        misc_log_group_name)
            cwl_conf.write(contents)
        cwl_root = dart_root_relative_path('..', 'dart-cwl-%s' % r_id)
        call('source %s && cd %s && docker build -f Dockerfile -t %s .' %
             (docker_init, cwl_root, cwl_image))

        _logger.info('running grunt build')
        call('cd %s && grunt build' %
             dart_root_relative_path('src', 'python', 'dart', 'web', 'ui'))

        _logger.info('building other docker images')
        for repo_name in [
                rn for rn in output_config['ecr']['repo_names']
                if not rn.endswith('cloudwatchlogs')
        ]:
            version = eng_cfg['emr_engine']['options'][
                'impala_version'] if 'impala' in repo_name else '1.0.0'
            docker_img = self._docker_image(repo_name,
                                            output_config,
                                            version=version)
            docker_file_suffix = repo_name.split('/')[-1]
            values = (docker_init, dart_root, docker_file_suffix, docker_img)
            call(
                'source %s && cd %s && docker build -f tools/docker/Dockerfile-%s -t %s .'
                % values)

        _logger.info('pushing docker images')
        cmd = ('source %s && cd %s && $(aws ecr get-login)' %
               (docker_init, dart_root)) + ' && docker push %s'
        for repo_name in output_config['ecr']['repo_names']:
            version = eng_cfg['emr_engine']['options'][
                'impala_version'] if 'impala' in repo_name else '1.0.0'
            call(cmd %
                 self._docker_image(repo_name, output_config, version=version))
Exemple #16
0
 def reserve_subscription_elements(element_ids):
     # because this is called by the trigger worker (always a single consumer),
     # we shouldn't have to deal with optimistic locking
     db.session.execute(
         update(SubscriptionElementDao).where(
             SubscriptionElementDao.id.in_(element_ids)).values(
                 state=SubscriptionElementState.RESERVED,
                 batch_id=random_id()))
     db.session.commit()
    def generate_subscription_elements(self, subscription):
        """ :type subscription: dart.model.subscription.Subscription """
        _update_subscription_state(subscription, SubscriptionState.GENERATING)

        dataset = self._dataset_service.get_dataset(subscription.data.dataset_id)
        conn = boto.connect_s3()
        bucket = get_bucket(conn, dataset.data.location)
        s3_keys = yield_s3_keys(
            bucket,
            dataset.data.location,
            subscription.data.s3_path_start_prefix_inclusive,
            subscription.data.s3_path_end_prefix_exclusive,
            subscription.data.s3_path_regex_filter,
        )
        elements = []
        subscription_element_dict = {}
        for i, key_obj in enumerate(s3_keys):
            sid = subscription.id
            s3_path = get_s3_path(key_obj)
            state = SubscriptionElementState.UNCONSUMED
            now = datetime.now()
            subscription_element_dict = {
                'id': random_id(),
                'version_id': 0,
                'created': now,
                'updated': now,
                'subscription_id': sid,
                's3_path': s3_path,
                'file_size': key_obj.size,
                'state': state
            }
            elements.append(subscription_element_dict)

            batch_size_reached = (i + 1) % _batch_size == 0
            if batch_size_reached:
                self._insert_elements(elements)
                elements = []

        if len(elements) > 0:
            self._insert_elements(elements)

        _update_subscription_state(subscription, SubscriptionState.ACTIVE)

        # Now that the subscription is ACTIVE, s3 events for new files will cause conditional inserts to be
        # performed to keep the subscription up to date.  However, in the time it took for the subscription
        # elements to be generated, s3 events for new objects could have been missed.  So we will do one final
        # s3 list operation (starting with the last inserted key) to fill in the potential gap.
        s3_keys = yield_s3_keys(
            bucket,
            dataset.data.location,
            subscription_element_dict.get('s3_path'),
            subscription.data.s3_path_end_prefix_exclusive,
            subscription.data.s3_path_regex_filter,
        )
        for key_obj in s3_keys:
            self.conditional_insert_subscription_element(subscription, get_s3_path(key_obj), key_obj.size)
Exemple #18
0
    def save_trigger(self,
                     trigger,
                     commit_and_initialize=True,
                     flush=False,
                     user_id=None):
        wf_uuid = uuid.uuid4().hex  # to avoid uuid serialization issues
        trigger.data.tags = trigger.data.tags if (trigger.data.tags) else []
        trigger.data.tags.append(wf_uuid)
        if user_id:
            trigger.data.user_id = user_id
        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type(
        ).name:
            raise DartValidationException('manual triggers cannot be saved')
        if trigger_type_name == self._retry_trigger_processor.trigger_type(
        ).name:
            raise DartValidationException('retry triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' %
                                          trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(
            trigger,
            trigger_schema(
                trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        if trigger_type_name == 'subscription_batch':
            sub = self._subscription_service.get_subscription(
                trigger.data.args['subscription_id'])
            if sub.data.nudge_id:
                response = self.update_nudge_with_trigger(
                    sub.data.nudge_id,
                    trigger.data.args['unconsumed_data_size_in_bytes'],
                    trigger_dao.id, trigger.data.trigger_type_name)
                assert (response.status_code == 200)
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
 def reserve_subscription_elements(element_ids):
     # because this is called by the trigger worker (always a single consumer),
     # we shouldn't have to deal with optimistic locking
     db.session.execute(
         update(SubscriptionElementDao)
         .where(SubscriptionElementDao.id.in_(element_ids))
         .values(
             state=SubscriptionElementState.RESERVED,
             batch_id=random_id()
         )
     )
     db.session.commit()
Exemple #20
0
 def save_api_key(api_key, commit=True, flush=False):
     """ :type api_key: dart.model.api_key.ApiKey """
     key_dao = ApiKeyDao()
     for key, value in user.to_dict().iteritems():
         setattr(key_dao, key, value)
     if not key_dao.id:
         key_dao.id = random_id()
     db.session.merge(key_dao)
     if flush:
         db.session.flush()
     if commit:
         db.session.commit()
     return key_dao.to_model()
Exemple #21
0
 def save_user(user, commit=True, flush=False):
     """ :type user: dart.model.user.User """
     user_dao = UserDao()
     for key, value in user.to_dict().iteritems():
         setattr(user_dao, key, value)
     if not user_dao.id:
         user_dao.id = random_id()
     db.session.merge(user_dao)
     if flush:
         db.session.flush()
     if commit:
         db.session.commit()
     return user_dao.to_model()
Exemple #22
0
 def save_user(user, commit=True, flush=False):
     """ :type user: dart.model.user.User """
     user_dao = UserDao()
     for key, value in user.to_dict().iteritems():
         setattr(user_dao, key, value)
     if not user_dao.id:
         user_dao.id = random_id()
     db.session.merge(user_dao)
     if flush:
         db.session.flush()
     if commit:
         db.session.commit()
     return user_dao.to_model()
Exemple #23
0
 def save_api_key(api_key, commit=True, flush=False):
     """ :type api_key: dart.model.api_key.ApiKey """
     key_dao = ApiKeyDao()
     for key, value in user.to_dict().iteritems():
         setattr(key_dao, key, value)
     if not key_dao.id:
         key_dao.id = random_id()
     db.session.merge(key_dao)
     if flush:
         db.session.flush()
     if commit:
         db.session.commit()
     return key_dao.to_model()
Exemple #24
0
def create_all():
    db.create_all()

    for mutex in Mutexes.all():
        sql = """
            INSERT INTO mutex (id, version_id, created, updated, name, state)
            SELECT :id, 0, NOW(), NOW(), :name, :state
            WHERE NOT EXISTS (SELECT NULL FROM mutex WHERE name = :name)
            """
        statement = text(sql).bindparams(id=random_id(), name=mutex, state=MutexState.READY)
        db.session.execute(statement)
        db.session.commit()

    return 'OK'
Exemple #25
0
    def save_event(event, commit=True, flush=False):
        """ :type event: dart.model.event.Event """
        event = default_and_validate(event, event_schema())

        event_dao = EventDao()
        event_dao.id = random_id()
        event_dao.data = event.data.to_dict()
        db.session.add(event_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        event = event_dao.to_model()
        return event
Exemple #26
0
    def save_workflow(workflow, commit=True, flush=False):
        """ :type workflow: dart.model.workflow.Workflow """
        workflow = default_and_validate(workflow, workflow_schema())

        workflow_dao = WorkflowDao()
        workflow_dao.id = random_id()
        workflow_dao.data = workflow.data.to_dict()
        db.session.add(workflow_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        workflow = workflow_dao.to_model()
        return workflow
Exemple #27
0
 def save_subgraph_definition(subgraph_definition, engine, trigger_schemas):
     """ :type engine: dart.model.engine.Engine
         :type subgraph_definition: dart.model.graph.SubGraphDefinition """
     action_schemas = [action_schema(e.params_json_schema) for e in engine.data.supported_action_types]
     ds_schema = datastore_schema(engine.data.options_json_schema)
     schema = subgraph_definition_schema(trigger_schemas, action_schemas, ds_schema)
     subgraph_definition = default_and_validate(subgraph_definition, schema)
     subgraph_definition_dao = SubGraphDefinitionDao()
     subgraph_definition_dao.id = random_id()
     subgraph_definition_dao.data = subgraph_definition.data.to_dict()
     subgraph_definition_dao.data['engine_name'] = engine.data.name
     db.session.add(subgraph_definition_dao)
     db.session.commit()
     return subgraph_definition_dao.to_model()
Exemple #28
0
    def save_event(event, commit=True, flush=False):
        """ :type event: dart.model.event.Event """
        event = default_and_validate(event, event_schema())

        event_dao = EventDao()
        event_dao.id = random_id()
        event_dao.data = event.data.to_dict()
        db.session.add(event_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        event = event_dao.to_model()
        return event
Exemple #29
0
    def save_workflow(workflow, commit=True, flush=False):
        """ :type workflow: dart.model.workflow.Workflow """
        workflow = default_and_validate(workflow, workflow_schema())

        workflow_dao = WorkflowDao()
        workflow_dao.id = random_id()
        workflow_dao.data = workflow.data.to_dict()
        db.session.add(workflow_dao)
        if flush:
            db.session.flush()
        if commit:
            db.session.commit()
        workflow = workflow_dao.to_model()
        return workflow
    def save_subscription(self, subscription, commit_and_generate=True, flush=False):
        """ :type subscription: dart.model.subscription.Subscription """
        subscription = default_and_validate(subscription, subscription_schema())

        subscription_dao = SubscriptionDao()
        subscription_dao.id = random_id()
        subscription.data.state = SubscriptionState.ACTIVE
        subscription_dao.data = subscription.data.to_dict()
        db.session.add(subscription_dao)
        if flush:
            db.session.flush()
        subscription = subscription_dao.to_model()
        if commit_and_generate:
            db.session.commit()
            subscription = subscription_dao.to_model()
        return subscription
Exemple #31
0
def create_all():
    db.create_all()

    for mutex in Mutexes.all():
        sql = """
            INSERT INTO mutex (id, version_id, created, updated, name, state)
            SELECT :id, 0, NOW(), NOW(), :name, :state
            WHERE NOT EXISTS (SELECT NULL FROM mutex WHERE name = :name)
            """
        statement = text(sql).bindparams(id=random_id(),
                                         name=mutex,
                                         state=MutexState.READY)
        db.session.execute(statement)
        db.session.commit()

    return 'OK'
Exemple #32
0
    def save_subscription(self, subscription, commit_and_generate=True, flush=False):
        """ :type subscription: dart.model.subscription.Subscription """
        subscription = default_and_validate(subscription, subscription_schema())

        subscription_dao = SubscriptionDao()
        subscription_dao.id = random_id()
        subscription.data.state = SubscriptionState.ACTIVE
        subscription_dao.data = subscription.data.to_dict()
        db.session.add(subscription_dao)
        if flush:
            db.session.flush()
        subscription = subscription_dao.to_model()
        if commit_and_generate:
            db.session.commit()
            subscription = subscription_dao.to_model()
        return subscription
Exemple #33
0
 def _clone_workflow_action_to_dao(source_action, **data_property_overrides):
     action = source_action.copy()
     assert isinstance(action, Action)
     action.data.workflow_action_id = source_action.id
     action.data.state = ActionState.HAS_NEVER_RUN
     action.data.progress = None
     action.data.queued_time = None
     action.data.start_time = None
     action.data.end_time = None
     action.data.error_message = None
     for k, v in data_property_overrides.iteritems():
         setattr(action.data, k, v)
     action_dao = ActionDao()
     action_dao.id = random_id()
     action_dao.data = action.data.to_dict()
     return action_dao
Exemple #34
0
 def _clone_workflow_action_to_dao(source_action, **data_property_overrides):
     action = source_action.copy()
     assert isinstance(action, Action)
     action.data.workflow_action_id = source_action.id
     action.data.state = ActionState.PENDING
     action.data.progress = None
     action.data.queued_time = None
     action.data.start_time = None
     action.data.end_time = None
     action.data.error_message = None
     for k, v in data_property_overrides.iteritems():
         setattr(action.data, k, v)
     action_dao = ActionDao()
     action_dao.id = random_id()
     action_dao.data = action.data.to_dict()
     return action_dao
Exemple #35
0
    def save_trigger(self,
                     trigger,
                     commit_and_initialize=True,
                     flush=False,
                     user_id=None):
        wf_uuid = uuid.uuid4().hex  # to avoid uuid serialization issues
        trigger.data.tags = trigger.data.tags if (trigger.data.tags) else []
        trigger.data.tags.append(wf_uuid)
        if user_id:
            trigger.data.user_id = user_id
        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type(
        ).name:
            raise DartValidationException('manual triggers cannot be saved')
        if trigger_type_name == self._retry_trigger_processor.trigger_type(
        ).name:
            raise DartValidationException('retry triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' %
                                          trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(
            trigger,
            trigger_schema(
                trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
Exemple #36
0
 def save_subgraph_definition(subgraph_definition, engine, trigger_schemas):
     """ :type engine: dart.model.engine.Engine
         :type subgraph_definition: dart.model.graph.SubGraphDefinition """
     action_schemas = [
         action_schema(e.params_json_schema)
         for e in engine.data.supported_action_types
     ]
     ds_schema = datastore_schema(engine.data.options_json_schema)
     schema = subgraph_definition_schema(trigger_schemas, action_schemas,
                                         ds_schema)
     subgraph_definition = default_and_validate(subgraph_definition, schema)
     subgraph_definition_dao = SubGraphDefinitionDao()
     subgraph_definition_dao.id = random_id()
     subgraph_definition_dao.data = subgraph_definition.data.to_dict()
     subgraph_definition_dao.data['engine_name'] = engine.data.name
     db.session.add(subgraph_definition_dao)
     db.session.commit()
     return subgraph_definition_dao.to_model()
Exemple #37
0
    def save_subscription(self, subscription, commit_and_generate=True, flush=False):
        """ :type subscription: dart.model.subscription.Subscription """
        subscription = default_and_validate(subscription, subscription_schema())

        subscription_dao = SubscriptionDao()
        subscription_dao.id = random_id()
        subscription.data.state = SubscriptionState.QUEUED
        subscription.data.queued_time = datetime.now()
        subscription_dao.data = subscription.data.to_dict()
        db.session.add(subscription_dao)
        if flush:
            db.session.flush()
        subscription = subscription_dao.to_model()
        if commit_and_generate:
            db.session.commit()
            subscription = subscription_dao.to_model()
            self._subscription_proxy.generate_subscription_elements(subscription)
        return subscription
Exemple #38
0
    def save_trigger(self, trigger, commit_and_initialize=True, flush=False, user_id=None):
        wf_uuid = uuid.uuid4().hex  # to avoid uuid serialization issues
        trigger.data.tags = trigger.data.tags if (trigger.data.tags) else []
        trigger.data.tags.append(wf_uuid)
        if user_id:
            trigger.data.user_id = user_id

        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type().name:
            raise DartValidationException('manual triggers cannot be saved')
        if trigger_type_name == self._retry_trigger_processor.trigger_type().name:
            raise DartValidationException('retry triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' % trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(trigger, trigger_schema(trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        if trigger_type_name == 'subscription_batch':
            sub = self._subscription_service.get_subscription(trigger.data.args['subscription_id'])
            if sub.data.nudge_id:
                response = self.update_nudge_with_trigger(sub.data.nudge_id,
                                                          trigger.data.args['unconsumed_data_size_in_bytes'],
                                                          trigger_dao.id,
                                                          trigger.data.trigger_type_name)
                assert(response.status_code == 200)
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
Exemple #39
0
    def clone_datastore(self, source_datastore, **data_property_overrides):
        datastore = Datastore.from_dict(source_datastore.to_dict())
        datastore.data.state = DatastoreState.INACTIVE
        datastore.data.host = None
        datastore.data.port = None
        datastore.data.username = None
        datastore.data.password = None
        datastore.data.connection_url = None
        datastore.data.extra_data = None
        self._set_s3_paths(datastore)
        for k, v in data_property_overrides.iteritems():
            setattr(datastore.data, k, v)

        datastore_dao = DatastoreDao()
        datastore_dao.id = random_id()
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        db.session.commit()
        return datastore_dao.to_model()
Exemple #40
0
    def clone_datastore(self, source_datastore, **data_property_overrides):
        datastore = Datastore.from_dict(source_datastore.to_dict())
        datastore.data.state = DatastoreState.INACTIVE
        datastore.data.host = None
        datastore.data.port = None
        datastore.data.username = None
        datastore.data.password = None
        datastore.data.connection_url = None
        datastore.data.extra_data = None
        self._set_s3_paths(datastore)
        for k, v in data_property_overrides.iteritems():
            setattr(datastore.data, k, v)

        datastore_dao = DatastoreDao()
        datastore_dao.id = random_id()
        datastore_dao.data = datastore.data.to_dict()
        db.session.add(datastore_dao)
        db.session.commit()
        return datastore_dao.to_model()
Exemple #41
0
 def save_workflow_instance(workflow, trigger_type, trigger_id, state):
     """ :type workflow: dart.model.workflow.Workflow
         :type trigger_type: dart.model.trigger.TriggerType """
     wf_instance_dao = WorkflowInstanceDao()
     wf_instance_dao.id = random_id()
     wf_data = workflow.data
     data = WorkflowInstanceData(
         workflow_id=workflow.id,
         engine_name=wf_data.engine_name,
         state=state,
         trigger_type=trigger_type.name,
         trigger_id=trigger_id,
         queued_time=datetime.now(),
         tags=wf_data.tags,
     )
     wf_instance_dao.data = data.to_dict()
     db.session.add(wf_instance_dao)
     db.session.commit()
     return wf_instance_dao.to_model()
Exemple #42
0
 def save_workflow_instance(workflow, trigger_type, trigger_id, state):
     """ :type workflow: dart.model.workflow.Workflow
         :type trigger_type: dart.model.trigger.TriggerType """
     wf_instance_dao = WorkflowInstanceDao()
     wf_instance_dao.id = random_id()
     wf_data = workflow.data
     data = WorkflowInstanceData(
         workflow_id=workflow.id,
         engine_name=wf_data.engine_name,
         state=state,
         trigger_type=trigger_type.name,
         trigger_id=trigger_id,
         queued_time=datetime.now(),
         tags=wf_data.tags,
     )
     wf_instance_dao.data = data.to_dict()
     db.session.add(wf_instance_dao)
     db.session.commit()
     return wf_instance_dao.to_model()
Exemple #43
0
    def to_entity_models_with_randomized_ids(entity_models):
        # To support editing multiple subgraphs concurrently, we need to randomize the ids.
        # This method takes advantage of JSON formatting (a bit hacky, but simple and fast).
        id_map = {}
        for e in entity_models:
            prefix = 'UNSAVED'
            if e.id.startswith('PARENT'):
                prefix = 'PARENT'
            if e.id.startswith('CHILD'):
                prefix = 'CHILD'
            id_map[e.id] = '%s-%s' % (prefix, random_id())

        results = []
        for em in entity_models:
            stringified_model = json.dumps(em.to_dict())
            for original_id, randomized_id in id_map.iteritems():
                stringified_model = stringified_model.replace('"%s"' % original_id, '"%s"' % randomized_id)
            cls = type(em)
            results.append(cls.from_dict(json.loads(stringified_model)))
        return results
Exemple #44
0
    def save_engine(self, engine):
        """ :type engine: dart.model.engine.Engine """
        engine = default_and_validate(engine, engine_schema())
        self._validate_ecs_task_definition(engine.data.ecs_task_definition)

        engine_dao = EngineDao()
        engine_dao.id = random_id()
        engine_dao.name = engine.data.name
        engine_dao.data = engine.data.to_dict()
        db.session.add(engine_dao)
        try:
            db.session.commit()
            engine = engine_dao.to_model()
            engine.data.ecs_task_definition_arn = self._register_ecs_task_definition(engine)
            return self.update_engine_data(engine.id, engine.data)

        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(e.orig, PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' % engine.data.name)
            raise e
Exemple #45
0
    def to_entity_models_with_randomized_ids(entity_models):
        # To support editing multiple subgraphs concurrently, we need to randomize the ids.
        # This method takes advantage of JSON formatting (a bit hacky, but simple and fast).
        id_map = {}
        for e in entity_models:
            prefix = 'UNSAVED'
            if e.id.startswith('PARENT'):
                prefix = 'PARENT'
            if e.id.startswith('CHILD'):
                prefix = 'CHILD'
            id_map[e.id] = '%s-%s' % (prefix, random_id())

        results = []
        for em in entity_models:
            stringified_model = json.dumps(em.to_dict())
            for original_id, randomized_id in id_map.iteritems():
                stringified_model = stringified_model.replace('"%s"' % original_id, '"%s"' % randomized_id)
            cls = type(em)
            results.append(cls.from_dict(json.loads(stringified_model)))
        return results
Exemple #46
0
    def save_dataset(dataset, commit=True, flush=False):
        """ :type dataset: dart.model.dataset.Dataset """
        dataset = default_and_validate(dataset, dataset_schema())

        dataset_dao = DatasetDao()
        dataset_dao.id = random_id()
        dataset_dao.name = dataset.data.name
        dataset.data.location = dataset.data.location.rstrip('/')
        dataset_dao.data = dataset.data.to_dict()
        db.session.add(dataset_dao)
        try:
            if flush:
                db.session.flush()
            if commit:
                db.session.commit()
            dataset = dataset_dao.to_model()
            return dataset
        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(e.orig, PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' % dataset.data.name)
            raise e
Exemple #47
0
    def save_subscription(self,
                          subscription,
                          commit_and_generate=True,
                          flush=False):
        """ :type subscription: dart.model.subscription.Subscription """
        subscription = default_and_validate(subscription,
                                            subscription_schema())

        subscription_dao = SubscriptionDao()
        subscription_dao.id = random_id()
        subscription.data.state = SubscriptionState.QUEUED
        subscription.data.queued_time = datetime.now()
        subscription_dao.data = subscription.data.to_dict()
        db.session.add(subscription_dao)
        if flush:
            db.session.flush()
        subscription = subscription_dao.to_model()
        if commit_and_generate:
            db.session.commit()
            subscription = subscription_dao.to_model()
            self._subscription_proxy.generate_subscription_elements(
                subscription)
        return subscription
Exemple #48
0
    def save_trigger(self, trigger, commit_and_initialize=True, flush=False, user_id=None):
        wf_uuid = uuid.uuid4().hex  # to avoid uuid serialization issues
        trigger.data.tags = trigger.data.tags if (trigger.data.tags) else []
        trigger.data.tags.append(wf_uuid)
        if user_id:
            trigger.data.user_id = user_id


        """ :type trigger: dart.model.trigger.Trigger """
        trigger_type_name = trigger.data.trigger_type_name
        if trigger_type_name == self._manual_trigger_processor.trigger_type().name:
            raise DartValidationException('manual triggers cannot be saved')
        if trigger_type_name == self._retry_trigger_processor.trigger_type().name:
            raise DartValidationException('retry triggers cannot be saved')
        trigger_processor = self._trigger_processors.get(trigger_type_name)
        if not trigger_processor:
            raise DartValidationException('unknown trigger_type_name: %s' % trigger_type_name)
        assert isinstance(trigger_processor, TriggerProcessor)
        trigger = default_and_validate(trigger, trigger_schema(trigger_processor.trigger_type().params_json_schema))

        trigger_dao = TriggerDao()
        trigger_dao.id = random_id()
        trigger_dao.data = trigger.data.to_dict()
        db.session.add(trigger_dao)
        if flush:
            db.session.flush()
        trigger = trigger_dao.to_model()
        if commit_and_initialize:
            db.session.commit()
            trigger = trigger_dao.to_model()
            try:
                trigger_processor.initialize_trigger(trigger, self)
            except:
                db.session.delete(trigger_dao)
                db.session.commit()
                raise
        return trigger
    def _handle_docker_concerns(self, cwl_image, eng_cfg, misc_log_group_name, output_config, syslog_log_group_name):
        if 'docker' in self.stacks_to_skip:
            _logger.info('skipping docker concerns')
            return

        _logger.info('configuring and building cloudwatch logs docker image (a special snowflake)')
        dart_root = dart_root_relative_path()
        r_id = random_id()
        values = (dart_root, r_id)
        call('cd %s && cd .. && git clone https://github.com/awslabs/ecs-cloudwatch-logs dart-cwl-%s' % values)
        docker_init = dart_root_relative_path('tools', 'docker', 'docker-local-init.sh')
        with open(dart_root_relative_path('aws', 'cloudwatch-logs', 'awslogs_template.conf')) as cwl_conf_template, \
                open(dart_root_relative_path('..', 'dart-cwl-%s/awslogs.conf' % r_id), mode='w') as cwl_conf:
            contents = cwl_conf_template.read()
            contents = contents.replace('{DART_LOG_GROUP_SYSLOG}', syslog_log_group_name)
            contents = contents.replace('{DART_LOG_GROUP_MISC}', misc_log_group_name)
            cwl_conf.write(contents)
        cwl_root = dart_root_relative_path('..', 'dart-cwl-%s' % r_id)
        call('source %s && cd %s && docker build -f Dockerfile -t %s .' % (docker_init, cwl_root, cwl_image))

        _logger.info('running grunt build')
        call('cd %s && grunt build' % dart_root_relative_path('src', 'python', 'dart', 'web', 'ui'))

        _logger.info('building other docker images')
        for repo_name in [rn for rn in output_config['ecr']['repo_names'] if not rn.endswith('cloudwatchlogs')]:
            version = eng_cfg['emr_engine']['options']['impala_version'] if 'impala' in repo_name else '1.0.0'
            docker_img = self._docker_image(repo_name, output_config, version=version)
            docker_file_suffix = repo_name.split('/')[-1]
            values = (docker_init, dart_root, docker_file_suffix, docker_img)
            call('source %s && cd %s && docker build -f tools/docker/Dockerfile-%s -t %s .' % values)

        _logger.info('pushing docker images')
        cmd = ('source %s && cd %s && $(aws ecr get-login)' % (docker_init, dart_root)) + ' && docker push %s'
        for repo_name in output_config['ecr']['repo_names']:
            version = eng_cfg['emr_engine']['options']['impala_version'] if 'impala' in repo_name else '1.0.0'
            call(cmd % self._docker_image(repo_name, output_config, version=version))
Exemple #50
0
    def save_engine(self, engine):
        """ :type engine: dart.model.engine.Engine """
        engine = default_and_validate(engine, engine_schema())
        self._validate_ecs_task_definition(engine.data.ecs_task_definition)

        engine_dao = EngineDao()
        engine_dao.id = random_id()
        engine_dao.name = engine.data.name
        engine_dao.data = engine.data.to_dict()
        db.session.add(engine_dao)
        try:
            db.session.commit()
            engine = engine_dao.to_model()
            engine.data.ecs_task_definition_arn = self._register_ecs_task_definition(
                engine)
            return self.update_engine_data(engine.id, engine.data)

        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(
                    e.orig,
                    PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' %
                                              engine.data.name)
            raise e
Exemple #51
0
    def save_dataset(dataset, commit=True, flush=False):
        """ :type dataset: dart.model.dataset.Dataset """
        dataset = default_and_validate(dataset, dataset_schema())

        dataset_dao = DatasetDao()
        dataset_dao.id = random_id()
        dataset_dao.name = dataset.data.name
        dataset.data.location = dataset.data.location.rstrip('/')
        dataset_dao.data = dataset.data.to_dict()
        db.session.add(dataset_dao)
        try:
            if flush:
                db.session.flush()
            if commit:
                db.session.commit()
            dataset = dataset_dao.to_model()
            return dataset
        except SqlAlchemyIntegrityError as e:
            if hasattr(e, 'orig') and isinstance(
                    e.orig,
                    PostgresIntegrityError) and e.orig.pgcode == '23505':
                raise DartValidationException('name already exists: %s' %
                                              dataset.data.name)
            raise e
Exemple #52
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs)
        self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset1_location = 's3://test/dataset/1/%s' + random_id()
        dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs)
        self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs)
        self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data))

        s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id))
        self.subscription0 = self.dart.save_subscription(s)

        s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id))
        self.subscription2 = self.dart.save_subscription(s)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE))
        self.datastore2 = self.dart.save_datastore(dst)

        wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)
        wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE))
        self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id)

        a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location}
        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE))
        a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE))
        a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        self.action00, self.action01, self.action02, self.action03, self.action04 = \
            self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE))
        self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id)

        a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN))
        a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE))
        self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id)
        self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id)

        self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE)))
        self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE)))

        tr_args = {'event_id': self.event1.id}
        tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger1 = self.dart.save_trigger(tr)

        tr_args = {'event_id': self.event2.id}
        tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger2 = self.dart.save_trigger(tr)

        st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE))
        self.super_trigger1 = self.dart.save_trigger(st)

        st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE))
        self.super_trigger2 = self.dart.save_trigger(st)