Example #1
0
def install_arcadia(operations, **kwargs):
	graph = wctx.graph_mode()
	send_event_starting_tasks = {}
	send_event_done_tasks = {}
	for node in wctx.nodes:
		for instance in node.instances:
			send_event_starting_tasks[instance.id] = instance.send_event('Starting to run operation')
			send_event_done_tasks[instance.id] = instance.send_event('Done running operation')

	for node in wctx.nodes:
		for instance in node.instances:
			sequence = graph.sequence()
			inst_kwargs = dict() 
			inst_kwargs['id'] = actx.test_component(instance)
			sequence.add(
				send_event_starting_tasks[instance.id],
				instance.execute_operation("create_and_configure", kwargs=inst_kwargs),
				send_event_done_tasks[instance.id])

	for node in wctx.nodes:
		sequence = graph.sequence()
		for instance in node.instances:
			for relationship in instance.relationships:
				rel_kwargs = dict()
				rel_kwargs['id'] = actx.test_relationship(relationship)
				sequence.add(
					relationship.execute_source_operation('preconfigure', kwargs=rel_kwargs))

	for node in wctx.nodes:
		for instance in node.instances:
			for rel in instance.relationships:
				instance_starting_task = send_event_starting_tasks.get(instance.id)
				target_done_task = send_event_done_tasks.get(rel.target_id)
				if instance_starting_task and target_done_task:
					graph.add_dependency(instance_starting_task, target_done_task)

	graph.execute()

	try:
		actx.client.generate_service_graph(actx.service_graph)
		actx.client.install_service_graph()
	except NotImplementedError:
		message = 'cancel service graph deployment: failed to generate or install graph, due too some missing functionality'
		wctx.logger.error(message)
		raise api.ExecutionCancelled(message)
	except ARCADIAServerRequestError as ex:
		message = 'cancel service graph deployment: arcadia server responded with an error message: {0}'.format(ex.message)
		wctx.logger.error(message)
		raise api.ExecutionCancelled(message)
Example #2
0
def sleep_with_cancel_support(ctx, use_legacy_cancel, **kwargs):
    node_instance = get_instance(ctx)

    node_instance.execute_operation('test_interface.operation',
                                    kwargs={
                                        'key': 'before-sleep',
                                        'value': None
                                    })

    node_instance.set_state('asleep')
    is_cancelled = False
    for i in range(10):
        if api.has_cancel_request():
            is_cancelled = True
            break
        time.sleep(1)

    if is_cancelled:
        if use_legacy_cancel:
            return api.EXECUTION_CANCELLED_RESULT
        else:
            raise api.ExecutionCancelled()

    node_instance.execute_operation('test_interface.operation',
                                    kwargs={
                                        'key': 'after-sleep',
                                        'value': None
                                    })
Example #3
0
    def get(self, retry_on_failure=True):
        """Get the task result. Will block until the task execution ends.

        :return: The task result
        """
        done = threading.Event()

        api.cancel_callbacks.add(done.set)
        self.on_result(lambda _result: done.set())
        done.wait()
        api.cancel_callbacks.discard(done.set)

        if api.has_cancel_request():
            if self._result is self._NOT_SET:
                self.result = api.ExecutionCancelled()
                raise self.result

        ctx = self.task.workflow_context
        if not ctx.internal.graph_mode:
            ctx.internal.task_graph.remove_task(self.task)

        if self.task.get_state() in (TASK_FAILED, TASK_RESCHEDULED):
            handler_result = self.task.handle_task_terminated()
            if handler_result.retried_task and retry_on_failure:
                handler_result.retried_task.apply_async()
                return handler_result.retried_task.async_result.get()
            else:
                raise self.result
        return self._result
def _wait_for_sent_tasks(ctx, graph):
    """Wait for tasks that are in the SENT state to return"""
    for task in graph.tasks_iter():
        # Check type.
        ctx.logger.debug(
            'Parallel task to failed task: {0}. State: {1}'.format(
                task.id, task.get_state()))
    try:
        deadline = time.time() + ctx.wait_after_fail
    except AttributeError:
        deadline = time.time() + 1800
    while deadline > time.time():
        try:
            cancelled = api.has_cancel_request()
        except AttributeError:
            cancelled = graph._is_execution_cancelled()
        if cancelled:
            raise api.ExecutionCancelled()
        try:
            finished_tasks = graph._finished_tasks()
        except AttributeError:
            finished_tasks = graph._terminated_tasks()
        for task in finished_tasks:
            try:
                graph._handle_terminated_task(task)
            except RuntimeError:
                ctx.logger.error('Unhandled Failed task: {0}'.format(task))
        if not any(task.get_state() == tasks.TASK_SENT
                   for task in graph.tasks_iter()):
            break
        else:
            time.sleep(0.1)
Example #5
0
def preconfig_rship_source(**kwargs):
	try:
		api_r = ARCADIARelationshipAPI(client=actx.client)
		api_r.preconfig_src_relationship(_instance=actx.relationships[kwargs.get('id')])
	except ARCADIAServerRequestError as error:
		ctx.logger.error(error.message)
		raise api.ExecutionCancelled(error.message)
Example #6
0
def create_policy(**kwargs):
	try:
		api_policy = ARCADIAPolicyAPI(client=actx.client)
		api_policy.init_policy(_instance=actx.components[kwargs.get('id')])
	except ARCADIAServerRequestError as error:
		ctx.logger.error(error.message)
		raise api.ExecutionCancelled(error.message)
Example #7
0
def run_jobs(**kwargs):  # pylint: disable=W0613
    """ Workflow to execute long running batch operations """
    success = True
    root_nodes, job_instances_map = build_graph(ctx.nodes)
    monitor = Monitor(job_instances_map, ctx.logger)

    new_exec_nodes = root_nodes

    # Monitoring and next executions loop
    while new_exec_nodes or monitor.is_something_executing(
    ) and not api.has_cancel_request():
        # perform new executions
        jobs_result_list = []
        for new_node in new_exec_nodes:
            monitor.add_node(new_node)
            if new_node.is_job:
                jobs_result_list += new_node.launch_all_instances()

        wait_jobs_to_finish(jobs_result_list)
        # Monitor the infrastructure
        monitor.update_status()
        exec_nodes_finished = []
        new_exec_nodes = []
        for node_name, exec_node in monitor.get_executions_iterator():
            if exec_node.check_status():
                if exec_node.completed:
                    exec_node.clean_all_instances()
                    exec_nodes_finished.append(node_name)
                    new_nodes_to_execute = exec_node.get_children_ready()
                    for new_node in new_nodes_to_execute:
                        new_exec_nodes.append(new_node)
            else:
                # Something went wrong in the node, cancel execution
                cancel_all(monitor.get_executions_iterator())
                return

        # remove finished nodes
        for node_name in exec_nodes_finished:
            monitor.finish_node(node_name)

        wait_jobs_to_finish(jobs_result_list)

    if monitor.is_something_executing():
        ctx.logger.info("Cancelling jobs...")
        cancel_all(monitor.get_executions_iterator())
        success = False

    deleted_reservations = []
    for instance_name in job_instances_map:
        instance = job_instances_map[instance_name]
        if instance.reservation not in deleted_reservations and instance.reservation:
            instance.delete_reservation()
            deleted_reservations.append(instance.reservation)

    if not success:
        raise api.ExecutionCancelled()
    ctx.logger.info(
        "------------------Workflow Finished-----------------------")
    return
Example #8
0
def create_serv_graph(**kwargs):
	try:
		api_srv = ARCADIAServiceGraphAPI(client=actx.client)
		api_srv.init_service_graph(_instance=actx.components[kwargs.get('id')])
		actx.service_graph = actx.components[kwargs.get('id')]
	except ARCADIAServerRequestError as error:
		ctx.logger.error(error.message)
		raise api.ExecutionCancelled(error.message)
Example #9
0
    def execute(self):
        """
        Start executing the graph based on tasks and dependencies between
        them.
        Calling this method will block until one of the following occurs:
            1. all tasks terminated
            2. a task failed
            3. an unhandled exception is raised
            4. the execution is cancelled

        Note: This method will raise an api.ExecutionCancelled error if the
        execution has been cancelled. When catching errors raised from this
        method, make sure to re-raise the error if it's
        api.ExecutionsCancelled in order to allow the execution to be set in
        cancelled mode properly.

        Also note that for the time being, if such a cancelling event
        occurs, the method might return even while there's some operations
        still being executed.
        """

        while True:

            if self._is_execution_cancelled():
                raise api.ExecutionCancelled()

            self._check_dump_request()

            # handle all terminated tasks
            # it is important this happens before handling
            # executable tasks so we get to make tasks executable
            # and then execute them in this iteration (otherwise, it would
            # be the next one)
            for task in self._terminated_tasks():
                self._handle_terminated_task(task)

            # handle all executable tasks
            for task in self._executable_tasks():
                self._handle_executable_task(task)

            # no more tasks to process, time to move on
            if len(self.graph.node) == 0:
                return
            # sleep some and do it all over again
            else:
                time.sleep(0.1)
Example #10
0
    def _is_finished(self):
        if api.has_cancel_request():
            self._error = api.ExecutionCancelled()
            return True

        if not self._tasks:
            return True

        if self._error:
            if not self._waiting_for:
                return True
            deadline = self._error_time + self.ctx.wait_after_fail
            if time.time() > deadline:
                return True
            else:
                self._wake_after_fail = threading.Timer(
                    deadline - time.time(), self._tasks_wait.set)
                self._wake_after_fail.daemon = True
                self._wake_after_fail.start()
        return False
Example #11
0
 def _check_execution_cancelled():
     if api.has_cancel_request():
         raise api.ExecutionCancelled()
def cancel_all(executions):
    """Cancel all pending or running jobs"""
    for _, exec_node in executions:
        exec_node.cancel_all_instances()
    raise api.ExecutionCancelled()
Example #13
0
    def execute(self):
        """
        Start executing the graph based on tasks and dependencies between
        them.\
        Calling this method will block until one of the following occurs:\
            1. all tasks terminated\
            2. a task failed\
            3. an unhandled exception is raised\
            4. the execution is cancelled\

        Note: This method will raise an api.ExecutionCancelled error if the\
        execution has been cancelled. When catching errors raised from this\
        method, make sure to re-raise the error if it's\
        api.ExecutionsCancelled in order to allow the execution to be set in\
        cancelled mode properly.\

        Also note that for the time being, if such a cancelling event\
        occurs, the method might return even while there's some operations\
        still being executed.
        """
        # clear error, in case the tasks graph has been reused
        self._error = None

        while self._error is None:

            if self._is_execution_cancelled():
                raise api.ExecutionCancelled()

            # handle all terminated tasks
            # it is important this happens before handling
            # executable tasks so we get to make tasks executable
            # and then execute them in this iteration (otherwise, it would
            # be the next one)
            for task in self._terminated_tasks():
                self._handle_terminated_task(task)

            # if there was an error when handling terminated tasks, don't
            # continue on to sending new tasks in handle_executable
            if self._error:
                break

            # handle all executable tasks
            for task in self._executable_tasks():
                self._handle_executable_task(task)

            # no more tasks to process, time to move on
            if len(self.graph.node) == 0:
                if self._error:
                    raise self._error
                return
            # sleep some and do it all over again
            else:
                time.sleep(0.1)

        # if we got here, we had an error in a task, and we're just waiting
        # for other tasks to return, but not sending new tasks
        deadline = time.time() + self.ctx.wait_after_fail
        while deadline > time.time():
            if self._is_execution_cancelled():
                raise api.ExecutionCancelled()
            for task in self._terminated_tasks():
                self._handle_terminated_task(task)
            if not any(self._sent_tasks()):
                break
            else:
                time.sleep(0.1)
        raise self._error
def _run_scale_settings(ctx,
                        scale_settings,
                        scalable_entity_properties,
                        scale_transaction_field=None,
                        scale_transaction_value=None,
                        ignore_failure=False,
                        ignore_rollback_failure=True,
                        instances_remove_ids=None,
                        node_sequence=None):
    modification = ctx.deployment.start_modification(scale_settings)
    graph = ctx.graph_mode()
    try:
        ctx.logger.info('Deployment modification started. '
                        '[modification_id={0}]'.format(modification.id))
        if len(set(modification.added.node_instances)):
            ctx.logger.info('Added: {}'.format(
                repr([
                    node_instance._node_instance.id
                    for node_instance in modification.added.node_instances
                    if node_instance.modification == 'added'
                ])))
            added_and_related = set(modification.added.node_instances)
            added = set(i for i in added_and_related
                        if i.modification == 'added')
            related = added_and_related - added
            try:
                for node_instance in added:
                    properties_updates = scalable_entity_properties.get(
                        node_instance._node_instance.node_id, {})
                    # save properties updates
                    properties = {}
                    if properties_updates:
                        # pop one dict for runtime properties
                        properties.update(properties_updates.pop())
                    # save transaction list
                    if scale_transaction_field:
                        # save original set of instances in scale up.
                        if scale_transaction_value:
                            properties.update({
                                scale_transaction_field:
                                scale_transaction_value
                            })
                        else:
                            properties.update(
                                {scale_transaction_field: modification.id})
                    # check properties to update
                    if properties:
                        ctx.logger.debug(
                            "{}: Updating {} runtime properties by {}".format(
                                node_instance._node_instance.node_id,
                                node_instance._node_instance.id,
                                repr(properties)))
                        _update_runtime_properties(
                            ctx, node_instance._node_instance.id, properties)
                if node_sequence:
                    subgraph_func = lifecycle.install_node_instance_subgraph
                    _process_node_instances(
                        ctx=ctx,
                        graph=graph,
                        node_instances=added,
                        ignore_failure=ignore_failure,
                        node_instance_subgraph_func=subgraph_func,
                        node_sequence=node_sequence)
                else:
                    lifecycle.install_node_instances(graph=graph,
                                                     node_instances=added,
                                                     related_nodes=related)
            except Exception as ex:
                ctx.logger.error(
                    'Scale out failed, scaling back in. {}'.format(repr(ex)))
                _uninstall_instances(ctx=ctx,
                                     graph=graph,
                                     removed=added,
                                     related=related,
                                     ignore_failure=ignore_rollback_failure,
                                     node_sequence=node_sequence)
                raise ex

        if len(set(modification.removed.node_instances)):
            ctx.logger.info('Removed: {}'.format(
                repr([
                    node_instance._node_instance.id
                    for node_instance in modification.removed.node_instances
                    if node_instance.modification == 'removed'
                ])))
            removed_and_related = set(modification.removed.node_instances)
            removed = set(i for i in removed_and_related
                          if i.modification == 'removed')
            ctx.logger.info('Proposed: {}'.format(repr(instances_remove_ids)))
            if instances_remove_ids:
                for instance in removed:
                    if instance._node_instance.id not in instances_remove_ids:
                        raise Exception(
                            "Instance {} not in proposed list {}.".format(
                                repr(instance._node_instance.id),
                                repr(instances_remove_ids)))
            related = removed_and_related - removed
            _uninstall_instances(ctx=ctx,
                                 graph=graph,
                                 removed=removed,
                                 ignore_failure=ignore_failure,
                                 related=related,
                                 node_sequence=node_sequence)
    except Exception as ex:
        ctx.logger.warn('Rolling back deployment modification. '
                        '[modification_id={0}]: {1}'.format(
                            modification.id, repr(ex)))
        try:
            deadline = time.time() + ctx.wait_after_fail
        except AttributeError:
            deadline = time.time() + 1800
        while deadline > time.time():
            if graph._is_execution_cancelled():
                raise api.ExecutionCancelled()
            for task in graph._terminated_tasks():
                graph._handle_terminated_task(task)
            if not any(task.get_state() == tasks.TASK_SENT
                       for task in graph.tasks_iter()):
                break
            else:
                time.sleep(0.1)
        modification.rollback()
        raise ex
    else:
        modification.finish()