def get_datastore_actions(): limit = int(request.args.get('limit', 20)) offset = int(request.args.get('offset', 0)) filters = [ filter_service().from_string(f) for f in json.loads(request.args.get('filters', '[]')) ] order_by = [ order_by_service().from_string(f) for f in json.loads(request.args.get('order_by', '[]')) ] datastore_id = request.args.get('datastore_id') workflow_id = request.args.get('workflow_id') if datastore_id: filters.append(Filter('datastore_id', Operator.EQ, datastore_id)) if workflow_id: filters.append(Filter('workflow_id', Operator.EQ, workflow_id)) actions = action_service().query_actions(filters, limit, offset, order_by) return { 'results': [a.to_dict() for a in actions], 'limit': limit, 'offset': offset, 'total': action_service().query_actions_count(filters) }
def start_datastore(emr_engine, datastore, action): """ :type emr_engine: dart.engine.emr.emr.EmrEngine :type datastore: dart.model.datastore.Datastore :type action: dart.model.action.Action """ cluster_name = 'dart-datastore-%s-%s' % (datastore.id, datastore.data.name) actions = emr_engine.dart.find_actions([ Filter('datastore_id', Operator.EQ, datastore.id), Filter('state', Operator.EQ, ActionState.HAS_NEVER_RUN), Filter('action_type_name', Operator.EQ, EmrActionTypes.load_dataset.name), ]) instance_groups_args = prepare_instance_groups( emr_engine, datastore, actions, emr_engine.core_node_limit, datastore.data.args['data_to_freespace_ratio']) bootstrap_actions_args = prepare_bootstrap_actions( datastore, emr_engine.impala_docker_repo_base_url, emr_engine.impala_version, False, action) extra_data = { 'instance_groups_args': instance_groups_args, 'bootstrap_action_args': bootstrap_actions_args, } if datastore.data.args['dry_run']: emr_engine.dart.patch_action(action, progress=1, extra_data=extra_data) return action = emr_engine.dart.patch_action(action, progress=0, extra_data=extra_data) cluster_id = create_cluster(bootstrap_actions_args, cluster_name, datastore, emr_engine, instance_groups_args) emr_engine.dart.patch_datastore(datastore, extra_data={'cluster_id': cluster_id}) emr_engine.dart.patch_action(action, progress=0.1) cluster = None while True: state = emr_engine.conn.describe_cluster(cluster_id).status.state # http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/ProcessingCycle.html # using cluster state since describe_jobflow is deprecated # https://aws.amazon.com/elasticmapreduce/faqs/ if state in ['STARTING', 'BOOTSTRAPPING', 'RUNNING']: time.sleep(30) else: break if state not in ['WAITING']: raise Exception( 'cluster_id=%s not in WAITING state, but in state: %s' % (cluster_id, cluster.state)) emr_engine.dart.patch_datastore( datastore, host=cluster.masterpublicdnsname, port=21050, connection_url='jdbc:impala://%s:%s/default' % (cluster.masterpublicdnsname, 21050))
def _handle_complete_action(self, message_id, message, previous_handler_failed): if previous_handler_failed: _logger.error('previous handler for message id=%s failed... see if retrying is possible' % message_id) return _logger.info("Complete Action Trigger: message_id={message_id}, message={message}".format(message_id=message_id, message=message)) state = message['action_state'] action = self._action_service.get_action(message['action_id']) assert isinstance(action, Action) datastore = self._datastore_service.get_datastore(action.data.datastore_id) error_message = message.get('error_message') self._action_service.update_action_state(action, state, error_message or action.data.error_message) wfid = action.data.workflow_id wfiid = action.data.workflow_instance_id wf = self._workflow_service.get_workflow(wfid) if wfid else None wfi = self._workflow_service.get_workflow_instance(wfiid) if wfiid else None callbacks = [] try_next_action = True try: if state == ActionState.FAILED: callbacks.append(lambda: self._emailer.send_action_failed_email(action, datastore)) if action.data.on_failure == ActionOnFailure.DEACTIVATE: try_next_action = False if wf and wfi: self._workflow_service.update_workflow_instance_state(wfi, WorkflowInstanceState.FAILED) if wfi.data.retry_num < wf.data.retries_on_failure: retry_num = wfi.data.retry_num + 1 callbacks.append(lambda: self._trigger_proxy.trigger_workflow_retry(wfid, retry_num)) else: self._workflow_service.update_workflow_state(wf, WorkflowState.INACTIVE) if wf.data.on_failure == WorkflowOnFailure.DEACTIVATE: self._datastore_service.update_datastore_state(datastore, DatastoreState.INACTIVE) f1 = Filter('workflow_instance_id', Operator.EQ, wfiid) f2 = Filter('state', Operator.EQ, ActionState.HAS_NEVER_RUN) for a in self._action_service.query_actions_all(filters=[f1, f2]): error_msg = 'A prior action (id=%s) in this workflow instance failed' % action.id self._action_service.update_action_state(a, ActionState.SKIPPED, error_msg) callbacks.append(lambda: self._emailer.send_workflow_failed_email(wf, wfi)) else: self._datastore_service.update_datastore_state(datastore, DatastoreState.INACTIVE) else: if wfi and action.data.last_in_workflow: self._handle_complete_workflow(callbacks, wf, wfi, wfid) elif state == ActionState.COMPLETED: if action.data.on_success_email: callbacks.append(lambda: self._emailer.send_action_completed_email(action, datastore)) if wfi and action.data.last_in_workflow: self._handle_complete_workflow(callbacks, wf, wfi, wfid) finally: for f in callbacks: f() if try_next_action: self._trigger_proxy.try_next_action({'datastore_id': datastore.id, 'log_info': message.get('log_info')})
def get_actions(self, datastore_id=None, workflow_id=None): """ :type datastore_id: str :type workflow_id: str :rtype: list[dart.model.action.Action] """ assert datastore_id or workflow_id, 'datastore_id and/or workflow_id must be provided' filters = [] if datastore_id: filters.append(Filter('datastore_id', Operator.EQ, datastore_id)) if workflow_id: filters.append(Filter('workflow_id', Operator.EQ, workflow_id)) return self.find_actions(filters)
def from_string(self, f_string): pattern = re.compile(r'\s*(\S+?)\s*(' + '|'.join(self._operator_handlers.keys()) + ')\s*(\S+)\s*') m = pattern.match(f_string) try: return Filter(m.group(1), m.group(2), m.group(3)) except: raise DartValidationException('could not parse filter: %s' % f_string)
def _find_workflow_instances(workflow=None): limit = int(request.args.get('limit', 20)) offset = int(request.args.get('offset', 0)) filters = [filter_service().from_string(f) for f in json.loads(request.args.get('filters', '[]'))] if workflow: filters.append(Filter('workflow_id', Operator.EQ, workflow.id)) workflow_instances = workflow_service().query_workflow_instances(filters, limit, offset) return { 'results': [d.to_dict() for d in workflow_instances], 'limit': limit, 'offset': offset, 'total': workflow_service().query_workflow_instances_count(filters) }