Ejemplo n.º 1
0
    def test_child_parent_relationship(self):
        """
		It tests the child parent relationship between several executions
		"""

        parent = Execution()
        parent.status = "x1"
        db.session.add(parent)
        db.session.commit()

        # Empty list of children
        parent = db.session.query(Execution).filter_by(status="x1").first()
        self.assertEquals(0, len(parent.children))

        # We add childer
        child_1 = Execution()
        child_1.status = "x2"
        parent.children.append(child_1)

        child_2 = Execution()
        child_2.status = "x3"
        parent.children.append(child_2)

        db.session.commit()

        parent = db.session.query(Execution).filter_by(status="x1").first()
        self.assertEquals(2, len(parent.children))
        self.assertEquals(child_1, parent.children[0])
        self.assertEquals(child_2, parent.children[1])

        child_1 = db.session.query(Execution).filter_by(status="x2").first()
        self.assertEquals(parent, child_1.parent)

        child_2 = db.session.query(Execution).filter_by(status="x3").first()
        self.assertEquals(parent, child_2.parent)
Ejemplo n.º 2
0
    def test_get_number_extra_jobs(self):
        """
		It returns the total number of extra jobs ids
		"""

        parent = Execution()
        parent.status = "x1"
        db.session.add(parent)
        db.session.commit()

        # Empty list of children
        self.assertEquals(0, parent.get_number_extra_jobs())

        # We add childer
        child_1 = Execution()
        child_1.status = "x2"
        parent.children.append(child_1)

        child_2 = Execution()
        child_2.status = Execution.__status_running__
        parent.children.append(child_2)

        db.session.commit()

        self.assertEquals(1, parent.get_number_extra_jobs())
Ejemplo n.º 3
0
    def test_many_to_many_relations_with_nodes(self):
        """
		It tests the many to many relations with Nodes
		"""

        node_1 = Node()
        node_1.name = "node1"
        node_1.information_retrieved = False
        node_2 = Node()
        node_2.name = "node2"
        node_2.information_retrieved = False
        db.session.add(node_1)
        db.session.add(node_2)

        execution_1 = Execution()
        execution_1.status = "x1"
        execution_2 = Execution()
        execution_2.status = "x2"
        db.session.add(execution_1)
        db.session.add(execution_2)

        db.session.commit()

        execution_1.nodes = [node_1, node_2]
        execution_2.nodes = [node_2, node_1]

        db.session.commit()

        execution = db.session.query(Execution).filter_by(status="x1").first()
        self.assertEquals(node_1, execution.nodes[0])
        self.assertEquals(node_2, execution.nodes[1])

        execution = db.session.query(Execution).filter_by(status="x2").first()
        self.assertEquals(node_2, execution.nodes[0])
        self.assertEquals(node_1, execution.nodes[1])
Ejemplo n.º 4
0
def stop_execution(execution):
    """
	It stops a checkpointable execution
	"""

    if Application.CHECKPOINTABLE == execution.execution_configuration.application.application_type:
        child = None

        if execution.status == Execution.__status_running__:
            child = Execution()
            child.status = Execution.__status_running__
            child.execution_configuration = execution.execution_configuration
            child.execution_type = execution.execution_configuration.execution_type
            child.slurm_sbatch_id = execution.slurm_sbatch_id

            execution.slurm_sbatch_id = -1
            execution.children.append(child)
        else:
            child = next(
                filter(
                    lambda child: child.status == Execution.__status_running__,
                    execution.children))  # Only one execution can be running

        execution.status = Execution.__status_stopped__
        db.session.commit()

        cancel_execution(child,
                         execution.execution_configuration.testbed.endpoint)
    else:
        slurm.stop_execution(
            execution.slurm_sbatch_id,
            execution.execution_configuration.testbed.endpoint)
Ejemplo n.º 5
0
    def test_crud_execution(self):
        """It test basic CRUD operations of an Execution class"""

        # We verify that the object is not in the db after creating it

        execution = Execution()
        execution.execution_type = "execution_type"
        execution.status = "status"
        self.assertIsNone(execution.id)

        # We store the object in the db
        db.session.add(execution)

        # We recover the execution from the db
        execution = db.session.query(Execution).filter_by(
            execution_type="execution_type").first()
        self.assertIsNotNone(execution.id)
        self.assertEquals("execution_type", execution.execution_type)
        self.assertEquals("status", execution.status)

        # We check that we can update the execution
        execution.execution_type = "X"
        db.session.commit()
        execution_2 = db.session.query(Execution).filter_by(
            execution_type="X").first()
        self.assertEquals(execution.id, execution_2.id)
        self.assertEquals("X", execution.execution_type)

        # We check the delation
        db.session.delete(execution_2)
        count = db.session.query(Execution).filter_by(
            execution_type="X").count()
        self.assertEquals(0, count)
Ejemplo n.º 6
0
    def setUp(self):
        """
        It creates the model objects and saves then in the database
        """
        super(RankingTests, self).setUp()

        self.execution = Execution()
        self.execution.slurm_sbatch_id = 2333

        execution_configuration = ExecutionConfiguration()
        execution_configuration.id = 22
        self.execution.execution_configuration = execution_configuration

        application = Application()
        application.name = "Matmul"
        execution_configuration.application = application

        testbed = Testbed("nova", True, "SLURM", "SSH", "*****@*****.**",
                          ["SINGULARITY"])
        execution_configuration.testbed = testbed

        db.session.add(testbed)
        db.session.add(application)
        db.session.add(execution_configuration)
        db.session.add(self.execution)
        db.session.commit()
Ejemplo n.º 7
0
    def setUp(self):
        """
        It creates the memory db
        """

        db.create_all()

        # We store some Applications in the db for the tests
        application_1 = Application()
        application_1.name = 'AppName_1'
        application_2 = Application()
        application_2.name = 'AppName_2'

        # Adding executing scripts
        execution_script_1 = ExecutionConfiguration()
        execution_script_1.execution_type = "slurm:sbatch"
        execution_script_2 = ExecutionConfiguration()
        execution_script_2.execution_type = "slurm:sbatch2"
        application_2.execution_configurations = [
            execution_script_1, execution_script_2
        ]

        db.session.add(application_1)
        db.session.add(application_2)

        # We store some testbeds in the db for the tests
        testbed_1 = Testbed("name_1", True, "slurm", "ssh", "user@server",
                            ['slurm'])
        testbed_2 = Testbed("name_2", False, "slurm", "ssh", "user@server",
                            ['slurm'])
        testbed_3 = Testbed("name_3", True, "slurm", "ssh", "user@server",
                            ['slurm', 'slurm:singularity'])
        db.session.add(testbed_1)
        db.session.add(testbed_2)
        db.session.add(testbed_3)
        db.session.commit()

        deployment = Deployment()
        deployment.executable_id = execution_script_1.id
        deployment.testbed_id = testbed_1.id
        db.session.add(deployment)

        # We store some nodes in the db for the tests
        node_1 = Node()
        node_1.name = "node_1"
        node_1.information_retrieved = True
        node_2 = Node()
        node_2.name = "node_2"
        node_2.information_retrieved = False
        db.session.add(node_1)
        db.session.add(node_2)

        execution = Execution()
        execution.execution_type = "execution_type"
        execution.status = "status"
        db.session.add(execution)

        db.session.commit()
Ejemplo n.º 8
0
    def test_initialization_execution(self):
        """Test the initializacion method of the class Execution"""

        execution = Execution()
        execution.execution_type = "execution_type"
        execution.status = "status"

        self.assertEquals("execution_type", execution.execution_type)
        self.assertEquals("status", execution.status)
Ejemplo n.º 9
0
def add_resource(execution):
	"""
	it adds resources to a running execution

	    adapt_compss_resources <master_node> <master_job_id> CREATE SLURM-Cluster default <singularity_image> 
	"""

	if (( execution.execution_type == execute_type_singularity_pm)) :
		logging.info("Executing type corresponds with SINGULARITY_PM, trying adaptation")

		if (( execution.status == Execution.__status_running__)) :
			url = execution.execution_configuration.testbed.endpoint
			scaling_upper_bound = execution.execution_configuration.application.scaling_upper_bound
			enqueue_env_file = execution.execution_configuration.testbed.extra_config['enqueue_env_file']
			singularity_image_file = execution.execution_configuration.executable.singularity_image_file
			sbatch_id = execution.batch_id

			upper_bound_ok = True
			if ( scaling_upper_bound is not None ) and ( scaling_upper_bound != 0 ) :
				if scaling_upper_bound <= execution.get_number_extra_jobs() :
					upper_bound_ok = False

			if upper_bound_ok :
				node = find_first_node(sbatch_id, url)

				command = "source"
				params = []
				params.append(enqueue_env_file)
				params.append(";")
				params.append("adapt_compss_resources")
				params.append(node)
				params.append(sbatch_id)
				params.append('CREATE SLURM-Cluster default')
				params.append(singularity_image_file)
				output = shell.execute_command(command, url, params)

				job_name = parse_add_resource_output(output)
				print(job_name)
				time.sleep(2)
				extra_job_id = get_job_id_after_adaptation(job_name, url)
				print(extra_job_id)

				if extra_job_id != '' or extra_job_id is not None :
					child = Execution()
					child.status = Execution.__status_running__
					child.execution_type = execute_type_singularity_pm
					child.batch_id = extra_job_id
					execution.children.append(child)
					db.session.commit()
					time.sleep(5)
					__add_nodes_to_execution__(child, url)
			else :
				logging.info('Execution already reached its maximum number of extra jobs, no adaptation possible')
		else :
			logging.info("Execution is not in RUNNING status, no action can be done")
	else :
		logging.info("Execution: " + execution.execution_type + " it is not compatible with add resource action")
Ejemplo n.º 10
0
def execute_application(execution_configuration,
                        create_profile=False,
                        use_stored_profile=False):
    """
	This function executes an application in the selected testbed,
	using the execution script configuration.
	"""

    # We create the execution
    execution = Execution()
    execution.execution_type = execution_configuration.execution_type
    execution.status = execute_status_submitted

    profile_folder = app.config['APP_PROFILE_FOLDER']

    db.session.add(execution)

    db.session.commit()

    # We verify that we recoginze the type of execution
    if execution.execution_type == execute_type_slurm_sbatch:

        t = Thread(target=execute_application_type_slurm_sbatch,
                   args=(execution, execution_configuration.id))
        t.start()
        return t
    elif execution.execution_type == execute_type_singularity_pm:
        t = Thread(target=execute_application_type_singularity_pm,
                   args=(execution, execution_configuration.id, create_profile,
                         use_stored_profile, profile_folder))
        t.start()
        return t
    elif execution.execution_type == execute_type_singularity_srun:
        t = Thread(target=execute_application_type_singularity_srun,
                   args=(execution, execution_configuration.id))
        t.start()
        return t
    elif execution.execution_type == execute_type_slurm_srun:
        t = Thread(target=execute_application_type_slurm_srun,
                   args=(execution, execution_configuration.id))
        t.start()
        return t
    elif execution.execution_type == Executable.__type_pm__:
        t = Thread(target=execute_application_type_pm,
                   args=(execution, execution_configuration.id, create_profile,
                         use_stored_profile, profile_folder))
        t.start()
        return t
    else:
        execution.status = execute_status_failed
        execution.output = "No support for execurtion type: " + execution.execution_type
        db.session.commit()
Ejemplo n.º 11
0
def execute_application_type_slurm_sbatch(execution, identifier):
    """
	Executes an application with a device supervisor configured
	for slurm sbatch
	"""

    execution_configuration, testbed, deployment, executable = __get_srun_info__(
        execution, identifier)

    if testbed.category != Testbed.slurm_category:
        # If the category is not SLURM we can not execute the app
        execution.status = execute_status_failed
        execution.output = "Testbed does not support " + execute_type_slurm_sbatch + " applications"
        db.session.commit()

    elif not testbed.on_line:
        # If the testbed is off-line is not SLURM we can not execute the app
        execution.status = execute_status_failed
        execution.output = "Testbed is off-line"
        db.session.commit()

    else:
        # Preparing the command to be executed
        command = "sbatch"
        endpoint = testbed.endpoint
        params = []
        params.append(executable.executable_file)

        logging.info("Launching execution of application: command: " +
                     command + " | endpoint: " + endpoint + " | params: " +
                     str(params))

        output = shell.execute_command(command, endpoint, params)
        print(output)

        sbatch_id = __extract_id_from_sbatch__(output)

        execution = Execution()
        execution.execution_type = execution_configuration.execution_type
        execution.status = Execution.__status_running__
        execution_configuration.executions.append(execution)
        execution.slurm_sbatch_id = sbatch_id
        db.session.commit()

        # Add nodes
        __add_nodes_to_execution__(execution, endpoint)
Ejemplo n.º 12
0
def restart_execution(execution):
	"""
	It stops a checkpointable execution
	"""
	
	# We create the execution
	child = Execution()
	child.execution_type = execution.execution_configuration.execution_type
	child.status = Execution.__status_submitted__
	execution.children.append(child)
	execution.status = Execution.__status_restarted__
	db.session.commit()
	
	if execution.execution_configuration.execution_type == execute_type_slurm_srun :
		execute_application_type_slurm_srun(child, execution.execution_configuration_id, True)
		child.status = Execution.__status_running__
		db.session.commit()

	else :
		child.status = Execution.__status_failed__
		db.session.commit()
Ejemplo n.º 13
0
def __parse_output__(output, endpoint, execution_configuration, child_execution=None):
	"""
	It parses output and adds nodes to the execution
	"""

	sbatch_id = __extract_id_from_squeue__(output)
	execution = None
	
	if child_execution :
		execution = child_execution
	else :
		execution = Execution()
		execution.execution_type = execution_configuration.execution_type
		execution_configuration.executions.append(execution)
	
	execution.status = Execution.__status_running__
	execution.batch_id = sbatch_id
	db.session.commit()

	# Add nodes
	__add_nodes_to_execution__(execution, endpoint)
Ejemplo n.º 14
0
def __execute_pm_applications__(execution, identifier, create_profile,
                                use_storage_profile, profile_folder,
                                singularity):
    """
	It executes a Singularity PM application in a targatted testbed
	"""

    # If create_profile = True we need to create a profile and associate it with the execution
    profile_file = ''
    if create_profile:
        profile_file = profile_folder + '/' + str(uuid.uuid4()) + '.profile'

    # Lets recover all the information needed...execution_configuration
    execution_configuration = db.session.query(
        ExecutionConfiguration).filter_by(id=identifier).first(
        )  # This is to avoid reusing objects from other thread
    testbed = db.session.query(Testbed).filter_by(
        id=execution_configuration.testbed_id).first()
    deployment = db.session.query(Deployment).filter_by(
        executable_id=execution_configuration.executable_id,
        testbed_id=testbed.id).first()
    executable = db.session.query(Executable).filter_by(
        id=execution_configuration.executable_id).first()

    # Preparing the command to be executed
    command = "source"
    endpoint = testbed.endpoint
    params = []
    params.append(testbed.extra_config['enqueue_env_file'])
    params.append(";")
    params.append("enqueue_compss")
    params.append("--sc_cfg=" + testbed.extra_config['enqueue_compss_sc_cfg'])
    params.append("--num_nodes=" + str(execution_configuration.num_nodes))
    params.append("--gpus_per_node=" +
                  str(execution_configuration.num_gpus_per_node))
    params.append("--cpus_per_node=" +
                  str(execution_configuration.num_cpus_per_node))

    if singularity:
        params.append("--container_image=" + deployment.path)
        params.append(
            "--container_compss_path=/opt/TANGO/TANGO_ProgrammingModel/COMPSs/"
        )  # TODO Ugly... ugly... and more ugly...
        #params.append("--appdir=" + executable.singularity_app_folder)
        params.append(
            "--appdir=/apps/application/")  # TODO Ugly... fix this...
    else:
        params.append("--appdir=" + executable.singularity_app_folder)
    params.append("--exec_time=" + str(execution_configuration.exec_time))

    # If create profile
    if create_profile:
        params.append("--output_profile=" + profile_file)
    # If we use a profile  --output_profile=<path>
    if use_storage_profile:
        params.append("--input_profile=" +
                      execution_configuration.profile_file)
    params.append(execution_configuration.compss_config)
    params.append(execution_configuration.command)

    logging.info("Launching execution of application: command: " + command +
                 " | endpoint: " + endpoint + " | params: " + str(params))

    output = shell.execute_command(command, endpoint, params)
    sbatch_id = __extract_id_from_sigularity_pm_app__(output)

    execution = Execution()
    execution.execution_type = execution_configuration.execution_type
    execution.status = Execution.__status_running__
    execution_configuration.executions.append(execution)
    # if we create the profile, we add it to the execution configuration
    if create_profile:
        execution_configuration.profile_file = profile_file
    execution.slurm_sbatch_id = sbatch_id
    db.session.commit()

    # Add nodes
    time.sleep(5)
    __add_nodes_to_execution__(execution, endpoint)
Ejemplo n.º 15
0
    def test_patch_execution_preprocessor(self, mock_restart_execution, mock_executor_stop, mock_executor_cancel, mock_executor_add, mock_executor_remove):
        """
        It test the correct work of the method of canceling an execution
        """

        # First we verify that nothing happens if launch_execution = False
        data = {'status': 'PEPITO'}

        response = self.client.patch("/api/v1/executions/100",
                                     data=json.dumps(data),
                                     content_type='application/json')

        self.assertEquals(409, response.status_code)
        self.assertEquals(
          'No execution by the given id',
          response.json['message'])

        # Preparing the data for the rest of the test
        testbed = Testbed("name", False, "slurm", "ssh", "user@server", ['slurm'])
        db.session.add(testbed)
        db.session.commit()
        application = Application()
        application.name = "xxx"
        application.application_type = "XXX"
        db.session.add(application)
        db.session.commit()
        execution_configuration = ExecutionConfiguration()
        execution_configuration.testbed = testbed
        execution_configuration.application = application
        db.session.add(execution_configuration)
        db.session.commit()
        execution = Execution()
        execution.execution_type = Executable.__type_singularity_srun__
        execution.status = Execution.__status_running__
        execution.execution_configuration = execution_configuration
       
        db.session.add(execution)
        db.session.commit()

        response = self.client.patch("/api/v1/executions/" + str(execution.id) ,
                                     data=json.dumps(data),
                                     content_type='application/json')
        self.assertEquals(409, response.status_code)
        self.assertEquals(
          'No valid state to try to change',
          response.json['message'])

        data = {'PEPITO': 'PEPITO'}
        response = self.client.patch("/api/v1/executions/" + str(execution.id) ,
                                     data=json.dumps(data),
                                     content_type='application/json')

        self.assertEquals(409, response.status_code)
        self.assertEquals(
          'No status, remove_resource, or add_resource field in the payload',
          response.json['message'])

        data = {'status': 'CANCEL'}
        response = self.client.patch("/api/v1/executions/" + str(execution.id) ,
                                     data=json.dumps(data),
                                     content_type='application/json')

        self.assertEquals(200, response.status_code)
        mock_executor_cancel.assert_called_with(execution, 'user@server')

        data = {'add_resource': ''}
        response = self.client.patch("/api/v1/executions/" + str(execution.id) ,
                                     data=json.dumps(data),
                                     content_type='application/json')

        mock_executor_add.assert_called_with(execution)

        data = {'remove_resource': ''}
        response = self.client.patch("/api/v1/executions/" + str(execution.id) ,
                                     data=json.dumps(data),
                                     content_type='application/json')

        mock_executor_remove.assert_called_with(execution)

        # Adding Checkpointable changes of status at ALDE level.
        execution.status = Execution.__status_running__
        application.application_type = Application.CHECKPOINTABLE
        db.session.commit()

        data = {'status': 'STOP'}
        response = self.client.patch("/api/v1/executions/" + str(execution.id),
                                    data=json.dumps(data),
                                    content_type="application/json")
        
        mock_executor_stop.assert_called_with(execution)

        execution.status = Execution.__status_cancel__
        db.session.commit()
        response = self.client.patch("/api/v1/executions/" + str(execution.id),
                                    data=json.dumps(data),
                                    content_type="application/json")
        self.assertEquals(409, response.status_code)
        self.assertEquals(
          'Execution is not in right state',
          response.json['message'])

        # Checkpointable restart
        execution.status = Execution.__status_stopped__
        db.session.commit()
        data = {'status': 'RESTART'}
        
        response = self.client.patch("/api/v1/executions/" + str(execution.id),
                                    data=json.dumps(data),
                                    content_type="application/json")
        
        mock_restart_execution.assert_called_with(execution)

        execution.status = Execution.__status_cancel__
        db.session.commit()
        response = self.client.patch("/api/v1/executions/" + str(execution.id),
                                    data=json.dumps(data),
                                    content_type="application/json")
        self.assertEquals(409, response.status_code)
        self.assertEquals(
          'Execution is not in right state',
          response.json['message'])
Ejemplo n.º 16
0
    def test_execute_application_type_torque_qsub(self, mock_shell,
                                                  mock_add_nodes):
        """
        It verifies that the application type slurm sbatch is executed
        """

        # First we verify that the testbed is of type TORQUE to be able
        # to execute it, in this case it should give an error since it is
        # not of type torque

        # We define the different entities necessary for the test.
        testbed = Testbed(
            name="nova2",
            on_line=True,
            category="xxxx",
            protocol="SSH",
            endpoint="*****@*****.**",
            package_formats=['sbatch', 'SINGULARITY'],
            extra_config={
                "enqueue_compss_sc_cfg":
                "nova.cfg",
                "enqueue_env_file":
                "/home_nfs/home_ejarquej/installations/rc1707/COMPSs/compssenv"
            })
        db.session.add(testbed)

        application = Application(name="super_app")
        db.session.add(application)
        db.session.commit()  # So application and testbed get an id

        executable = Executable()
        executable.compilation_type = Executable.__type_torque_qsub__
        executable.executable_file = "pepito.sh"
        db.session.add(executable)
        db.session.commit()  # We do this so executable gets and id

        deployment = Deployment()
        deployment.testbed_id = testbed.id
        deployment.executable_id = executable.id
        db.session.add(
            deployment)  # We add the executable to the db so it has an id

        execution_config = ExecutionConfiguration()
        execution_config.execution_type = Executable.__type_torque_qsub__
        execution_config.application = application
        execution_config.testbed = testbed
        execution_config.executable = executable
        db.session.add(execution_config)
        db.session.commit()

        execution = Execution()
        execution.execution_type = Executable.__type_torque_qsub__
        execution.status = Execution.__status_submitted__

        torque.execute_batch(execution, execution_config.id)

        self.assertEquals(Execution.__status_failed__, execution.status)
        self.assertEquals("Testbed does not support TORQUE:QSUB applications",
                          execution.output)

        # If the testbed is off-line, execution isn't allowed also
        testbed.category = Testbed.torque_category
        testbed.on_line = False
        db.session.commit()

        execution = Execution()
        execution.execution_type = Executable.__type_torque_qsub__
        execution.status = Execution.__status_submitted__

        torque.execute_batch(execution, execution_config.id)

        self.assertEquals(Executable.__type_torque_qsub__,
                          execution.execution_type)
        self.assertEquals(Execution.__status_failed__, execution.status)
        self.assertEquals("Testbed is off-line", execution.output)

        ## Test executing
        output = b'1208.cloudserver'
        mock_shell.return_value = output

        testbed.category = Testbed.torque_category
        testbed.on_line = True
        db.session.commit()

        execution = Execution()
        execution.execution_type = Executable.__type_torque_qsub__
        execution.status = Execution.__status_submitted__

        torque.execute_batch(execution, execution_config.id)

        mock_shell.assert_called_with("qsub", "*****@*****.**",
                                      ["pepito.sh"])
        execution = db.session.query(Execution).filter_by(
            execution_configuration_id=execution_config.id).first()
        self.assertEqual(execution.execution_type,
                         execution_config.execution_type)
        self.assertEqual(execution.status, Execution.__status_running__)
        self.assertEqual("1208.cloudserver", execution.batch_id)