Ejemplo n.º 1
0
def get_datastore_actions():
    limit = int(request.args.get('limit', 20))
    offset = int(request.args.get('offset', 0))
    filters = [
        filter_service().from_string(f)
        for f in json.loads(request.args.get('filters', '[]'))
    ]
    order_by = [
        order_by_service().from_string(f)
        for f in json.loads(request.args.get('order_by', '[]'))
    ]
    datastore_id = request.args.get('datastore_id')
    workflow_id = request.args.get('workflow_id')
    if datastore_id:
        filters.append(Filter('datastore_id', Operator.EQ, datastore_id))
    if workflow_id:
        filters.append(Filter('workflow_id', Operator.EQ, workflow_id))

    actions = action_service().query_actions(filters, limit, offset, order_by)
    return {
        'results': [a.to_dict() for a in actions],
        'limit': limit,
        'offset': offset,
        'total': action_service().query_actions_count(filters)
    }
Ejemplo n.º 2
0
def start_datastore(emr_engine, datastore, action):
    """
    :type emr_engine: dart.engine.emr.emr.EmrEngine
    :type datastore: dart.model.datastore.Datastore
    :type action: dart.model.action.Action
    """
    cluster_name = 'dart-datastore-%s-%s' % (datastore.id, datastore.data.name)
    actions = emr_engine.dart.find_actions([
        Filter('datastore_id', Operator.EQ, datastore.id),
        Filter('state', Operator.EQ, ActionState.HAS_NEVER_RUN),
        Filter('action_type_name', Operator.EQ,
               EmrActionTypes.load_dataset.name),
    ])
    instance_groups_args = prepare_instance_groups(
        emr_engine, datastore, actions, emr_engine.core_node_limit,
        datastore.data.args['data_to_freespace_ratio'])
    bootstrap_actions_args = prepare_bootstrap_actions(
        datastore, emr_engine.impala_docker_repo_base_url,
        emr_engine.impala_version, False, action)

    extra_data = {
        'instance_groups_args': instance_groups_args,
        'bootstrap_action_args': bootstrap_actions_args,
    }
    if datastore.data.args['dry_run']:
        emr_engine.dart.patch_action(action, progress=1, extra_data=extra_data)
        return

    action = emr_engine.dart.patch_action(action,
                                          progress=0,
                                          extra_data=extra_data)

    cluster_id = create_cluster(bootstrap_actions_args, cluster_name,
                                datastore, emr_engine, instance_groups_args)
    emr_engine.dart.patch_datastore(datastore,
                                    extra_data={'cluster_id': cluster_id})
    emr_engine.dart.patch_action(action, progress=0.1)

    cluster = None
    while True:
        state = emr_engine.conn.describe_cluster(cluster_id).status.state
        # http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/ProcessingCycle.html
        # using cluster state since describe_jobflow is deprecated
        # https://aws.amazon.com/elasticmapreduce/faqs/
        if state in ['STARTING', 'BOOTSTRAPPING', 'RUNNING']:
            time.sleep(30)
        else:
            break

    if state not in ['WAITING']:
        raise Exception(
            'cluster_id=%s not in WAITING state, but in state: %s' %
            (cluster_id, cluster.state))

    emr_engine.dart.patch_datastore(
        datastore,
        host=cluster.masterpublicdnsname,
        port=21050,
        connection_url='jdbc:impala://%s:%s/default' %
        (cluster.masterpublicdnsname, 21050))
Ejemplo n.º 3
0
    def _handle_complete_action(self, message_id, message, previous_handler_failed):
        if previous_handler_failed:
            _logger.error('previous handler for message id=%s failed... see if retrying is possible' % message_id)
            return

        _logger.info("Complete Action Trigger: message_id={message_id}, message={message}".format(message_id=message_id, message=message))
        state = message['action_state']
        action = self._action_service.get_action(message['action_id'])
        assert isinstance(action, Action)
        datastore = self._datastore_service.get_datastore(action.data.datastore_id)
        error_message = message.get('error_message')
        self._action_service.update_action_state(action, state, error_message or action.data.error_message)
        wfid = action.data.workflow_id
        wfiid = action.data.workflow_instance_id
        wf = self._workflow_service.get_workflow(wfid) if wfid else None
        wfi = self._workflow_service.get_workflow_instance(wfiid) if wfiid else None
        callbacks = []
        try_next_action = True
        try:
            if state == ActionState.FAILED:
                callbacks.append(lambda: self._emailer.send_action_failed_email(action, datastore))

                if action.data.on_failure == ActionOnFailure.DEACTIVATE:
                    try_next_action = False
                    if wf and wfi:
                        self._workflow_service.update_workflow_instance_state(wfi, WorkflowInstanceState.FAILED)
                        if wfi.data.retry_num < wf.data.retries_on_failure:
                            retry_num = wfi.data.retry_num + 1
                            callbacks.append(lambda: self._trigger_proxy.trigger_workflow_retry(wfid, retry_num))
                        else:
                            self._workflow_service.update_workflow_state(wf, WorkflowState.INACTIVE)
                            if wf.data.on_failure == WorkflowOnFailure.DEACTIVATE:
                                self._datastore_service.update_datastore_state(datastore, DatastoreState.INACTIVE)
                        f1 = Filter('workflow_instance_id', Operator.EQ, wfiid)
                        f2 = Filter('state', Operator.EQ, ActionState.HAS_NEVER_RUN)
                        for a in self._action_service.query_actions_all(filters=[f1, f2]):
                            error_msg = 'A prior action (id=%s) in this workflow instance failed' % action.id
                            self._action_service.update_action_state(a, ActionState.SKIPPED, error_msg)
                        callbacks.append(lambda: self._emailer.send_workflow_failed_email(wf, wfi))
                    else:
                        self._datastore_service.update_datastore_state(datastore, DatastoreState.INACTIVE)
                else:
                    if wfi and action.data.last_in_workflow:
                        self._handle_complete_workflow(callbacks, wf, wfi, wfid)

            elif state == ActionState.COMPLETED:
                if action.data.on_success_email:
                    callbacks.append(lambda: self._emailer.send_action_completed_email(action, datastore))
                if wfi and action.data.last_in_workflow:
                    self._handle_complete_workflow(callbacks, wf, wfi, wfid)

        finally:
            for f in callbacks:
                f()

        if try_next_action:
            self._trigger_proxy.try_next_action({'datastore_id': datastore.id, 'log_info': message.get('log_info')})
Ejemplo n.º 4
0
 def get_actions(self, datastore_id=None, workflow_id=None):
     """ :type datastore_id: str
         :type workflow_id: str
         :rtype: list[dart.model.action.Action] """
     assert datastore_id or workflow_id, 'datastore_id and/or workflow_id must be provided'
     filters = []
     if datastore_id:
         filters.append(Filter('datastore_id', Operator.EQ, datastore_id))
     if workflow_id:
         filters.append(Filter('workflow_id', Operator.EQ, workflow_id))
     return self.find_actions(filters)
Ejemplo n.º 5
0
 def from_string(self, f_string):
     pattern = re.compile(r'\s*(\S+?)\s*(' +
                          '|'.join(self._operator_handlers.keys()) +
                          ')\s*(\S+)\s*')
     m = pattern.match(f_string)
     try:
         return Filter(m.group(1), m.group(2), m.group(3))
     except:
         raise DartValidationException('could not parse filter: %s' %
                                       f_string)
Ejemplo n.º 6
0
def _find_workflow_instances(workflow=None):
    limit = int(request.args.get('limit', 20))
    offset = int(request.args.get('offset', 0))
    filters = [filter_service().from_string(f) for f in json.loads(request.args.get('filters', '[]'))]
    if workflow:
        filters.append(Filter('workflow_id', Operator.EQ, workflow.id))
    workflow_instances = workflow_service().query_workflow_instances(filters, limit, offset)
    return {
        'results': [d.to_dict() for d in workflow_instances],
        'limit': limit,
        'offset': offset,
        'total': workflow_service().query_workflow_instances_count(filters)
    }