Esempio n. 1
0
    def execute_pattern(self, pattern, resource):

        
        pattern_start_time = datetime.datetime.now()

        
        def unit_state_cb(unit, state):
            # Callback function for the messages printed when
            # RADICAL_ENMD_VERBOSE = info
            if state == radical.pilot.DONE:
                self.get_logger().info("Task {0} state has finished succefully.".format(unit.uid))

            if state==radical.pilot.FAILED:
                # In Case the pilot fails report Error messsage
                self.get_logger().error("Task {0} FAILED.".format(unit.uid))
                self.get_logger().error("Error: {0}".format(unit.stderr))

        def comparisons (set1,set2):
            ret = list ()
            if set2 == None:
                for i in range (1, (len(set1)+1)):
                    for j in range (1, len(set2)+1):
                        ret.append ([i, j])
            else:
                for i in range (1, (len(set1)+1)):
                    for j in range (i+1, len(set1)+1):
                        ret.append ([i, j])
            return ret

        #-----------------------------------------------------------------------
        # Starting Plugin Execution


        self.get_logger().debug("Set 1 is {0}".format(pattern.set1_elements()))
        self.get_logger().debug("Set 2 is {0}".format(pattern.set2_elements()))
        NumElementsSet1 = len(pattern.set1_elements())
        Permutations = pattern.permutations
        if pattern.set2_elements() is None:
            self.get_logger().info("Number of Elements {0}".format(NumElementsSet1))
            self.get_logger().info("Executing All Pairs Pattern on the set {0} with {1} cores on {2}"
            .format(pattern.set1_elements(),resource._cores,resource._resource_key))
        else:
            NumElementsSet2 = len(pattern.set2_elements())
            self.get_logger().info("Number of Elements of the First Set {0}".format(NumElementsSet1))
            self.get_logger().info("Number of Elements of the First Set {0}".format(NumElementsSet2))
            self.get_logger().info("Executing All Pairs Pattern on the sets {0}-{1} with {2} cores on {3}"
            .format(pattern.set1_elements(),pattern.set2_elements(),resource._cores,resource._resource_key))


        STAGING_AREA = 'staging:///'
        
        try:
            
            resource._umgr.register_callback(unit_state_cb)
            CUDesc_list = list()
            self.get_logger().info("Creating the Elements of Set 1")
            
            for i in range(1,NumElementsSet1+1):
                kernel = pattern.set1element_initialization(element=i)
                link_out_data=kernel.get_arg("--filename=")
                kernel._bind_to_resource(resource._resource_key)
                self.get_logger().debug("Kernels : {0}, Name: {1}".format(kernel,dir(kernel)))
            #     #Output File Staging. The file after it is created in the folder of each CU, is moved to the folder defined in
            #     #the start of the script
                OUTPUT_FILE           = {'source':link_out_data,
                                         'target':os.path.join(STAGING_AREA,link_out_data),
                                         'action':radical.pilot.LINK}
                cudesc                = radical.pilot.ComputeUnitDescription()
                cudesc.pre_exec       = kernel._cu_def_pre_exec
                cudesc.executable     = kernel._cu_def_executable
                cudesc.arguments      = kernel.arguments
                cudesc.mpi            = kernel.uses_mpi
                cudesc.output_staging = [OUTPUT_FILE]
                #self.get_logger().info("Target {0} to : {0}".format(kernel._cu_def_output_data))
                self.get_logger().debug("Pre Exec: {0} Executable: {1} Arguments: {2} MPI: {3} Output: {4}".format(cudesc.pre_exec,
                    kernel._cu_def_executable,cudesc.arguments,cudesc.mpi,cudesc.output_staging))
                CUDesc_list.append(cudesc)
            
            if pattern.set2_elements() is not None:
                self.get_logger().info("Creating the Elements of Set 2")
            
                for i in range(1,NumElementsSet2+1):
                    kernel = pattern.set2element_initialization(element=i)
                    link_out_data=kernel.get_arg("--filename=")
                    kernel._bind_to_resource(resource._resource_key)
                    self.get_logger().debug("Kernels : {0}, Name: {1}".format(kernel,dir(kernel)))
                #     #Output File Staging. The file after it is created in the folder of each CU, is moved to the folder defined in
                #     #the start of the script
                    OUTPUT_FILE           = {'source':link_out_data,
                                             'target':os.path.join(STAGING_AREA,link_out_data),
                                             'action':radical.pilot.LINK}
                    cudesc                = radical.pilot.ComputeUnitDescription()
                    cudesc.pre_exec       = kernel._cu_def_pre_exec
                    cudesc.executable     = kernel._cu_def_executable
                    cudesc.arguments      = kernel.arguments
                    cudesc.mpi            = kernel.uses_mpi
                    cudesc.output_staging = [OUTPUT_FILE]
                    #self.get_logger().info("Target {0} to : {0}".format(kernel._cu_def_output_data))
                    self.get_logger().debug("Pre Exec: {0} Executable: {1} Arguments: {2} MPI: {3} Output: {4}".format(cudesc.pre_exec,
                        kernel._cu_def_executable,cudesc.arguments,cudesc.mpi,cudesc.output_staging))
                    CUDesc_list.append(cudesc)
            
            Units = resource._umgr.submit_units(CUDesc_list)
            
            resource._umgr.wait_units()
            
            CUDesc_list = list()
            all_cus = []
            
            windowsize1 = pattern._windowsize1
            windowsize2 = pattern._windowsize2
            journal = dict()
            
            step_timings = {
                "name": "AllPairs",
                "timings": {}
            }
            
            step_start_time_abs = datetime.datetime.now()
            
            for i in range(1,NumElementsSet1+1,windowsize1):
                
                if pattern.set2_elements() is None:
                
                    for j in range(i,NumElementsSet1+1,windowsize1):
                        kernel = pattern.element_comparison(elements1=range(i,i+windowsize1), 
                            elements2=range(j,j+windowsize1))
                        try:
                            link_input1=ast.literal_eval(kernel.get_arg("--inputfile1="))
                        except:
                            link_input1=[kernel.get_arg("--inputfile1=")]
                        try:
                            link_input2=ast.literal_eval(kernel.get_arg("--inputfile2="))
                        except:
                            link_input2=[kernel.get_arg("--inputfile2=")]
                        link_output=kernel.get_arg("--outputfile=")
                        kernel._bind_to_resource(resource._resource_key)
                        self.get_logger().info("Kernels : {0}, Name: {1}".format(kernel,dir(kernel)))
                    #     #Output File Staging. The file after it is created in the folder of each CU, is moved to the folder defined in
                    #     #the start of the script
                        self.get_logger().debug("i = {0}, j = {1}, window size = {2}".format(i,j,windowsize1))
                        self.get_logger().debug("Link Input 1 = {0}".format(link_input1))
                        self.get_logger().debug("Link Input 2 = {0}".format(link_input2))
                        INPUT_FILE1           = [{'source': os.path.join(STAGING_AREA,link_input1[k-1]),
                                                'target' : link_input1[k-1],
                                                'action' : radical.pilot.LINK} for k in range(1,windowsize1+1)]
                
                        if i != j:
                            INPUT_FILE2           = [{'source': os.path.join(STAGING_AREA, link_input2[k-1]),
                                                    'target' : link_input2[k-1],
                                                    'action' : radical.pilot.LINK} for k in range(1,windowsize1+1)]
                        else:
                            INPUT_FILE2       = []
                        cudesc                = radical.pilot.ComputeUnitDescription()
                        cudesc.name           = "comp; {el11};{el21}".format(el11=i,el21=j)
                        cudesc.pre_exec       = kernel._cu_def_pre_exec
                        cudesc.executable     = kernel._cu_def_executable
                        cudesc.arguments      = kernel.arguments
                        cudesc.mpi            = kernel.uses_mpi
                        self.get_logger().debug("Input File 1: {0}".format(INPUT_FILE1))
                        self.get_logger().debug("Input File 2: {0}".format(INPUT_FILE2))
                
                        if kernel._cu_def_input_data is None:
                            self.get_logger().debug("Input Staging without Kernel CU DEF Input Data")
                            cudesc.input_staging  = INPUT_FILE1+INPUT_FILE2
                        else:
                            self.get_logger().debug("Input Staging with Kernel CU DEF Input Data")
                            cudesc.input_staging  = kernel._cu_def_input_data+INPUT_FILE1+INPUT_FILE2
                        cudesc.output_staging = [link_output]
                        self.get_logger().debug("Pre Exec: {0} Executable: {1} Arguments: {2} MPI: {3} Input: {4} Output: {5}".format(cudesc.pre_exec,
                            kernel._cu_def_executable,cudesc.arguments,cudesc.mpi,cudesc.input_staging,cudesc.output_staging))
                        all_cus.append(cudesc)
            
                else:
            
                    for j in range(1,NumElementsSet2+1,windowsize2):
                        kernel = pattern.element_comparison(elements1=range(i,i+windowsize1), 
                            elements2=range(j,j+windowsize2))
                        try:
                            link_input1=ast.literal_eval(kernel.get_arg("--inputfile1="))
                        except:
                            link_input1=[kernel.get_arg("--inputfile1=")]
                        try:
                            link_input2=ast.literal_eval(kernel.get_arg("--inputfile2="))
                        except:
                            link_input2=[kernel.get_arg("--inputfile2=")]
                        link_output=kernel.get_arg("--outputfile=")
                        kernel._bind_to_resource(resource._resource_key)
                        self.get_logger().info("Kernels : {0}, Name: {1}".format(kernel,dir(kernel)))
                    #     #Output File Staging. The file after it is created in the folder of each CU, is moved to the folder defined in
                    #     #the start of the script
                        INPUT_FILE1           = [{'source': os.path.join(STAGING_AREA,link_input1[k-1]),
                                                'target' : link_input1[k-1],
                                                'action' : radical.pilot.LINK} for k in range(1,windowsize1+1)]
                        INPUT_FILE2           = [{'source': os.path.join(STAGING_AREA, link_input2[k-1]),
                                                'target' : link_input2[k-1],
                                                'action' : radical.pilot.LINK} for k in range(1,windowsize2+1)]
                        cudesc                = radical.pilot.ComputeUnitDescription()
                        cudesc.name           = "comp; {el11};{el21}".format(el11=i,el21=j)
                        cudesc.pre_exec       = kernel._cu_def_pre_exec
                        cudesc.executable     = kernel._cu_def_executable
                        cudesc.arguments      = kernel.arguments
                        cudesc.mpi            = kernel.uses_mpi
            
                        if kernel._cu_def_input_data is None:
                            self.get_logger().debug("Input Staging without Kernel CU DEF Input Data")
                            cudesc.input_staging=INPUT_FILE1+INPUT_FILE2
                        else:
                            self.get_logger().debug("Input Staging with Kernel CU DEF Input Data")
                            cudesc.input_staging  = kernel._cu_def_input_data+INPUT_FILE1+INPUT_FILE2
                        cudesc.output_staging = [link_output]
                        self.get_logger().debug("Pre Exec: {0} Executable: {1} Arguments: {2} MPI: {3} Input: {4} Output: {5}".format(cudesc.pre_exec,
                            kernel._cu_def_executable,cudesc.arguments,cudesc.mpi,cudesc.input_staging,cudesc.output_staging))
                        all_cus.append(cudesc)
            
            sub_unit=resource._umgr.submit_units(all_cus)
            
            #self.get_logger().debug(sub_unit)
            resource._umgr.wait_units()
            step_end_time_abs = datetime.datetime.now()
            self.get_logger().info("Pattern execution successful.")
            
            # Process CU information and append it to the dictionary
            tinfo = extract_timing_info(sub_unit, pattern_start_time, step_start_time_abs, step_end_time_abs)
            self.get_logger().debug("Extracted timings Information")
            
            mean_unit_time = 0
            
            for unit in sub_unit:
                mean_unit_time =mean_unit_time+(unit.stop_time-unit.start_time).total_seconds()
            mean_unit_time = mean_unit_time/len(sub_unit)
            self.get_logger().debug("Mean CU execution time is %f"%mean_unit_time)
            
            for key, val in tinfo.iteritems():
                step_timings['timings'][key] = val
            self.get_logger().debug("Created step timings")
            
            ## Write the whole thing to the profiling dict
            pattern._execution_profile.append(step_timings)
            self.get_logger().debug("Wrote the whole thing to the profiling dict")
        except KeyboardInterrupt:
            traceback.print_exc()
    def execute_pattern(self, pattern, resource):
        try:
            try:
                cycles = pattern.nr_cycles + 1
            except:
                self.get_logger().exception("Number of cycles (nr_cycles) must be defined for pattern ReplicaExchange!")
                raise

            do_profile = os.getenv("RADICAL_ENMD_PROFILING", "0")
            if do_profile == "1":
                pattern._execution_profile = []
                all_cus = []

            # shared data needs to be processed here
            #

            # Pilot must be active
            resource._pmgr.wait_pilots(resource._pilot.uid, "Active")

            if do_profile == "1":
                pattern_start_time = datetime.datetime.now()

            replicas = pattern.get_replicas()

            for c in range(1, cycles):

                if do_profile == "1":
                    step_timings = {"name": "md_run_{0}".format(c), "timings": {}}
                    step_start_time_abs = datetime.datetime.now()

                md_units = []
                for r in replicas:

                    self.get_logger().info("Building input files for replica %d" % r.id)
                    pattern.build_input_file(r)
                    self.get_logger().info("Preparing replica %d for MD run" % r.id)
                    r_kernel = pattern.prepare_replica_for_md(r)
                    r_kernel._bind_to_resource(resource._resource_key)

                    # need to process data directives here
                    #

                    cu = radical.pilot.ComputeUnitDescription()
                    cu.pre_exec = r_kernel._cu_def_pre_exec
                    cu.executable = r_kernel._cu_def_executable
                    cu.arguments = r_kernel.arguments
                    cu.mpi = r_kernel.uses_mpi
                    cu.cores = r_kernel.cores
                    cu.input_staging = r_kernel._cu_def_input_data
                    cu.output_staging = r_kernel._cu_def_output_data

                    sub_replica = resource._umgr.submit_units(cu)
                    md_units.append(sub_replica)

                if do_profile == "1":
                    all_cus.extend(md_units)

                self.get_logger().info("Performing MD step for replicas")
                resource._umgr.wait_units()

                if do_profile == "1":
                    step_end_time_abs = datetime.datetime.now()

                failed_units = ""
                for unit in md_units:
                    if unit.state != radical.pilot.DONE:
                        failed_units += " * MD step: Unit {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                if len(failed_units) > 0:
                    sys.exit()

                if do_profile == "1":
                    # Process CU information and append it to the dictionary
                    if isinstance(pattern_start_time, datetime.datetime):
                        if isinstance(step_start_time_abs, datetime.datetime):
                            if isinstance(step_end_time_abs, datetime.datetime):
                                tinfo = extract_timing_info(
                                    md_units, pattern_start_time, step_start_time_abs, step_end_time_abs
                                )
                            else:
                                sys.exit(
                                    "Ensemble MD Toolkit Error: step_end_time_abs for {0} is not datetime.datetime instance.".format(
                                        step_timings["name"]
                                    )
                                )
                        else:
                            sys.exit(
                                "Ensemble MD Toolkit Error: step_start_time_abs for {0} is not datetime.datetime instance.".format(
                                    step_timings["name"]
                                )
                            )
                    else:
                        sys.exit("Ensemble MD Toolkit Error: pattern_start_time is not datetime.datetime instance.")

                    for key, val in tinfo.iteritems():
                        step_timings["timings"][key] = val

                    # Write the whole thing to the profiling dict
                    pattern._execution_profile.append(step_timings)
                # ---------------------------------------------------------------

                if c < cycles:
                    if do_profile == "1":
                        step_timings = {"name": "local_exchange_{0}".format(c), "timings": {}}
                        step_start_time_abs = datetime.datetime.now()

                    # computing swap matrix
                    self.get_logger().info("Computing swap matrix")
                    swap_matrix = pattern.get_swap_matrix(replicas)

                    # this is actual exchange
                    for r_i in replicas:
                        r_j = pattern.exchange(r_i, replicas, swap_matrix)
                        if r_j != r_i:
                            # swap parameters
                            self.get_logger().info(
                                "Performing exchange of parameters between replica %d and replica %d" % (r_j.id, r_i.id)
                            )
                            pattern.perform_swap(r_i, r_j)

                    if do_profile == "1":
                        step_end_time_abs = datetime.datetime.now()

                        # processing timings
                        step_start_time_rel = step_start_time_abs - pattern_start_time
                        step_end_time_rel = step_end_time_abs - pattern_start_time

                        tinfo = {
                            "step_start_time": {"abs": step_start_time_abs, "rel": step_start_time_rel},
                            "step_end_time": {"abs": step_end_time_abs, "rel": step_end_time_rel},
                        }

                        for key, val in tinfo.iteritems():
                            step_timings["timings"][key] = val

                        # Write the whole thing to the profiling dict
                        pattern._execution_profile.append(step_timings)
                    # --------------------------------------------------------------------

            # End of simulation loop
            # ------------------------

        except Exception, ex:
            self.get_logger().exception("Fatal error during execution: {0}.".format(str(ex)))
            raise
Esempio n. 3
0
    def execute_pattern(self, pattern, resource):

        pattern_start_time = datetime.datetime.now()

        #-----------------------------------------------------------------------
        #
        def unit_state_cb (unit, state) :

            if state == radical.pilot.FAILED:
                self.get_logger().error("ComputeUnit error: STDERR: {0}, STDOUT: {0}".format(unit.stderr, unit.stdout))
                self.get_logger().error("Pattern execution FAILED.")


        self.get_logger().info("Executing simulation-analysis loop with {0} iterations on {1} allocated core(s) on '{2}'".format(pattern.iterations, resource._cores, resource._resource_key))

        working_dirs = {}
        all_cus = []

        pattern._execution_profile = []

        try:
            resource._umgr.register_callback(unit_state_cb)

            ########################################################################
            # execute pre_loop
            #
            try:
                ################################################################
                # EXECUTE PRE-LOOP

                step_timings = {
                    "name": "pre_loop",
                    "timings": {}
                }
                step_start_time_abs = datetime.datetime.now()

                pre_loop = pattern.pre_loop()
                pre_loop._bind_to_resource(resource._resource_key)

                cu = radical.pilot.ComputeUnitDescription()
                cu.name = "pre_loop"

                cu.pre_exec       = pre_loop._cu_def_pre_exec
                cu.executable     = pre_loop._cu_def_executable
                cu.arguments      = pre_loop.arguments
                cu.mpi            = pre_loop.uses_mpi
                cu.input_staging  = pre_loop._cu_def_input_data
                cu.output_staging = pre_loop._cu_def_output_data

                self.get_logger().debug("Created pre_loop CU: {0}.".format(cu.as_dict()))

                unit = resource._umgr.submit_units(cu)
                all_cus.append(unit)

                self.get_logger().info("Submitted ComputeUnit(s) for pre_loop step.")
                self.get_logger().info("Waiting for ComputeUnit(s) in pre_loop step to complete.")
                resource._umgr.wait_units()
                self.get_logger().info("Pre_loop completed.")

                step_end_time_abs = datetime.datetime.now()

                if unit.state != radical.pilot.DONE:
                    raise EnsemblemdError("Pre-loop CU failed with error: {0}".format(unit.stdout))
                pre_loop_cu = [unit]
                working_dirs["pre_loop"] = saga.Url(unit.working_directory).path

                # Process CU information and append it to the dictionary
                tinfo = extract_timing_info(pre_loop_cu, pattern_start_time, step_start_time_abs, step_end_time_abs)

                for key, val in tinfo.iteritems():
                    step_timings['timings'][key] = val

                # Write the whole thing to the profiling dict
                pattern._execution_profile.append(step_timings)

            except Exception:
                # Doesn't exist. That's fine as it is not mandatory.
                self.get_logger().info("pre_loop() not defined. Skipping.")
                pass

            ########################################################################
            # execute simulation analysis loop
            #
            for iteration in range(1, pattern.iterations+1):

                working_dirs['iteration_{0}'.format(iteration)] = {}

                ################################################################
                # EXECUTE SIMULATION STEPS
                step_timings = {
                    "name": "simulation_iteration_{0}".format(iteration),
                    "timings": {}
                }
                step_start_time_abs = datetime.datetime.now()

                if isinstance(pattern.simulation_step(iteration=1, instance=1),list):
                    num_sim_kerns = len(pattern.simulation_step(iteration=1, instance=1))
                else:
                    num_sim_kerns = 1
                #print num_sim_kerns

                all_sim_cus = []

                for kern_step in range(0,num_sim_kerns):

                    s_units = []
                    for s_instance in range(1, pattern._simulation_instances+1):

                        if isinstance(pattern.simulation_step(iteration=iteration, instance=s_instance),list):
                            sim_step = pattern.simulation_step(iteration=iteration, instance=s_instance)[kern_step]
                        else:
                            sim_step = pattern.simulation_step(iteration=iteration, instance=s_instance)

                        sim_step._bind_to_resource(resource._resource_key)

                        # Resolve all placeholders
                        #if sim_step.link_input_data is not None:
                        #    for i in range(len(sim_step.link_input_data)):
                        #        sim_step.link_input_data[i] = resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step.link_input_data[i])


                        cud = radical.pilot.ComputeUnitDescription()
                        cud.name = "sim ;{iteration} ;{instance}".format(iteration=iteration, instance=s_instance)

                        cud.pre_exec       = sim_step._cu_def_pre_exec
                        cud.executable     = sim_step._cu_def_executable
                        cud.arguments      = sim_step.arguments
                        cud.mpi            = sim_step.uses_mpi
                        cud.input_staging  = None
                        cud.output_staging = None

                        # INPUT DATA:
                        #------------------------------------------------------------------------------------------------------------------
                        # upload_input_data
                        data_in = []
                        if sim_step._kernel._upload_input_data is not None:
                            if isinstance(sim_step._kernel._upload_input_data,list):
                                pass
                            else:
                                sim_step._kernel._upload_input_data = [sim_step._kernel._upload_input_data]
                            for i in range(0,len(sim_step._kernel._upload_input_data)):
                                var=resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step._kernel._upload_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip()
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip())
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # link_input_data
                        data_in = []
                        if sim_step._kernel._link_input_data is not None:
                            if isinstance(sim_step._kernel._link_input_data,list):
                                pass
                            else:
                                sim_step._kernel._link_input_data = [sim_step._kernel._link_input_data]
                            for i in range(0,len(sim_step._kernel._link_input_data)):
                                var=resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step._kernel._link_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.LINK
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.LINK
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # copy_input_data
                        data_in = []
                        if sim_step._kernel._copy_input_data is not None:
                            if isinstance(sim_step._kernel._copy_input_data,list):
                                pass
                            else:
                                sim_step._kernel._copy_input_data = [sim_step._kernel._copy_input_data]
                            for i in range(0,len(sim_step._kernel._copy_input_data)):
                                var=resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step._kernel._copy_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.COPY
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.COPY
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # download input data
                        if sim_step.download_input_data is not None:
                            data_in  = sim_step.download_input_data
                            if cud.input_staging is None:
                                cud.input_staging = data_in
                            else:
                                cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        # OUTPUT DATA:
                        #------------------------------------------------------------------------------------------------------------------
                        # copy_output_data
                        data_out = []
                        if sim_step._kernel._copy_output_data is not None:
                            if isinstance(sim_step._kernel._copy_output_data,list):
                                pass
                            else:
                                sim_step._kernel._copy_output_data = [sim_step._kernel._copy_output_data]
                            for i in range(0,len(sim_step._kernel._copy_output_data)):
                                var=resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step._kernel._copy_output_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.COPY
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.COPY
                                        }
                                data_out.append(temp)

                        if cud.output_staging is None:
                            cud.output_staging = data_out
                        else:
                            cud.output_staging += data_out
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # download_output_data
                        data_out = []
                        if sim_step._kernel._download_output_data is not None:
                            if isinstance(sim_step._kernel._download_output_data,list):
                                pass
                            else:
                                sim_step._kernel._download_output_data = [sim_step._kernel._download_output_data]
                            for i in range(0,len(sim_step._kernel._download_output_data)):
                                var=resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step._kernel._download_output_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip()
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip())
                                        }
                                data_out.append(temp)

                        if cud.output_staging is None:
                            cud.output_staging = data_out
                        else:
                            cud.output_staging += data_out
                        #------------------------------------------------------------------------------------------------------------------


                        if sim_step.cores is not None:
                            cud.cores = sim_step.cores

                        s_units.append(cud)

                        if sim_step.get_instance_type() == 'single':
                            break
                        
                    self.get_logger().debug("Created simulation CU: {0}.".format(cud.as_dict()))
                    s_cus = resource._umgr.submit_units(s_units)
                    all_cus.extend(s_cus)
                    all_sim_cus.extend(s_cus)

                    self.get_logger().info("Submitted tasks for simulation iteration {0}.".format(iteration))
                    self.get_logger().info("Waiting for simulations in iteration {0}/ kernel {1}: {2} to complete.".format(iteration,kern_step+1,sim_step.name))
                    resource._umgr.wait_units()
                    self.get_logger().info("Simulations in iteration {0}/ kernel {1}: {2} completed.".format(iteration,kern_step+1,sim_step.name))

                    failed_units = ""
                    for unit in s_cus:
                        if unit.state != radical.pilot.DONE:
                            failed_units += " * Simulation task {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                step_end_time_abs = datetime.datetime.now()

                # TODO: ensure working_dir <-> instance mapping
                i = 0
                for cu in s_cus:
                    i += 1
                    working_dirs['iteration_{0}'.format(iteration)]['simulation_{0}'.format(i)] = saga.Url(cu.working_directory).path
       
                # Process CU information and append it to the dictionary
                tinfo = extract_timing_info(all_sim_cus, pattern_start_time, step_start_time_abs, step_end_time_abs)
                for key, val in tinfo.iteritems():
                    step_timings['timings'][key] = val

                # Write the whole thing to the profiling dict
                pattern._execution_profile.append(step_timings)


                ################################################################
                # EXECUTE ANALYSIS STEPS
                step_timings = {
                    "name": "analysis_iteration_{0}".format(iteration),
                    "timings": {}
                }
                step_start_time_abs = datetime.datetime.now()

                if isinstance(pattern.analysis_step(iteration=1, instance=1),list):
                    num_ana_kerns = len(pattern.analysis_step(iteration=1, instance=1))
                else:
                    num_ana_kerns = 1
                #print num_ana_kerns

                all_ana_cus = []

                for kern_step in range(0,num_ana_kerns):

                    a_units = []
                    for a_instance in range(1, pattern._analysis_instances+1):

                        if isinstance(pattern.analysis_step(iteration=iteration, instance=a_instance),list):
                            ana_step = pattern.analysis_step(iteration=iteration, instance=a_instance)[kern_step]
                        else:
                            ana_step = pattern.analysis_step(iteration=iteration, instance=a_instance)

                        ana_step._bind_to_resource(resource._resource_key)

                        # Resolve all placeholders
                        #if ana_step.link_input_data is not None:
                        #    for i in range(len(ana_step.link_input_data)):
                        #        ana_step.link_input_data[i] = resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step.link_input_data[i])

                        cud = radical.pilot.ComputeUnitDescription()
                        cud.name = "ana ; {iteration}; {instance}".format(iteration=iteration, instance=a_instance)

                        cud.pre_exec       = ana_step._cu_def_pre_exec
                        cud.executable     = ana_step._cu_def_executable
                        cud.arguments      = ana_step.arguments
                        cud.mpi            = ana_step.uses_mpi
                        cud.input_staging  = None
                        cud.output_staging = None

                        #------------------------------------------------------------------------------------------------------------------
                        # upload_input_data
                        data_in = []
                        if ana_step._kernel._upload_input_data is not None:
                            if isinstance(ana_step._kernel._upload_input_data,list):
                                pass
                            else:
                                ana_step._kernel._upload_input_data = [ana_step._kernel._upload_input_data]
                            for i in range(0,len(ana_step._kernel._upload_input_data)):
                                var=resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step._kernel._upload_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip()
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip())
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # link_input_data
                        data_in = []
                        if ana_step._kernel._link_input_data is not None:
                            if isinstance(ana_step._kernel._link_input_data,list):
                                pass
                            else:
                                ana_step._kernel._link_input_data = [ana_step._kernel._link_input_data]
                            for i in range(0,len(ana_step._kernel._link_input_data)):
                                var=resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step._kernel._link_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.LINK
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.LINK
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # copy_input_data
                        data_in = []
                        if ana_step._kernel._copy_input_data is not None:
                            if isinstance(ana_step._kernel._copy_input_data,list):
                                pass
                            else:
                                ana_step._kernel._copy_input_data = [ana_step._kernel._copy_input_data]
                            for i in range(0,len(ana_step._kernel._copy_input_data)):
                                var=resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step._kernel._copy_input_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.COPY
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.COPY
                                        }
                                data_in.append(temp)

                        if cud.input_staging is None:
                            cud.input_staging = data_in
                        else:
                            cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # download input data
                        if ana_step.download_input_data is not None:
                            data_in  = ana_step.download_input_data
                            if cud.input_staging is None:
                                cud.input_staging = data_in
                            else:
                                cud.input_staging += data_in
                        #------------------------------------------------------------------------------------------------------------------


                        #------------------------------------------------------------------------------------------------------------------
                        # copy_output_data
                        data_out = []
                        if ana_step._kernel._copy_output_data is not None:
                            if isinstance(ana_step._kernel._copy_output_data,list):
                                pass
                            else:
                                ana_step._kernel._copy_output_data = [ana_step._kernel._copy_output_data]
                            for i in range(0,len(ana_step._kernel._copy_output_data)):
                                var=resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step._kernel._copy_output_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip(),
                                            'action': radical.pilot.COPY
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip()),
                                            'action': radical.pilot.COPY
                                        }
                                data_out.append(temp)

                        if cud.output_staging is None:
                            cud.output_staging = data_out
                        else:
                            cud.output_staging += data_out
                        #------------------------------------------------------------------------------------------------------------------

                        #------------------------------------------------------------------------------------------------------------------
                        # download_output_data
                        data_out = []
                        if ana_step._kernel._download_output_data is not None:
                            if isinstance(ana_step._kernel._download_output_data,list):
                                pass
                            else:
                                ana_step._kernel._download_output_data = [ana_step._kernel._download_output_data]
                            for i in range(0,len(ana_step._kernel._download_output_data)):
                                var=resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step._kernel._download_output_data[i])
                                if len(var.split('>')) > 1:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': var.split('>')[1].strip()
                                        }
                                else:
                                    temp = {
                                            'source': var.split('>')[0].strip(),
                                            'target': os.path.basename(var.split('>')[0].strip())
                                        }
                                data_out.append(temp)

                        if cud.output_staging is None:
                            cud.output_staging = data_out
                        else:
                            cud.output_staging += data_out
                        #------------------------------------------------------------------------------------------------------------------


                        if ana_step.cores is not None:
                            cud.cores = ana_step.cores

                        a_units.append(cud)

                        if ana_step.get_instance_type == 'single':
                            break

                    self.get_logger().debug("Created analysis CU: {0}.".format(cud.as_dict()))
                    a_cus = resource._umgr.submit_units(a_units)
                    all_cus.extend(a_cus)
                    all_ana_cus.extend(a_cus)

                    self.get_logger().info("Submitted tasks for analysis iteration {0}.".format(iteration))
                    self.get_logger().info("Waiting for analysis tasks in iteration {0}/kernel {1}: {2} to complete.".format(iteration,kern_step+1,ana_step.name))
                    resource._umgr.wait_units()
                    self.get_logger().info("Analysis in iteration {0}/kernel {1}: {2} completed.".format(iteration,kern_step+1,ana_step.name))

                    failed_units = ""
                    for unit in a_cus:
                        if unit.state != radical.pilot.DONE:
                            failed_units += " * Analysis task {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                
                step_end_time_abs = datetime.datetime.now()

                i = 0
                for cu in a_cus:
                    i += 1
                    working_dirs['iteration_{0}'.format(iteration)]['analysis_{0}'.format(i)] = saga.Url(cu.working_directory).path

                # Process CU information and append it to the dictionary
                tinfo = extract_timing_info(all_ana_cus, pattern_start_time, step_start_time_abs, step_end_time_abs)

                for key, val in tinfo.iteritems():
                    step_timings['timings'][key] = val

                # Write the whole thing to the profiling dict
                pattern._execution_profile.append(step_timings)

        except KeyboardInterrupt:
            traceback.print_exc()
Esempio n. 4
0
    def execute_pattern(self, pattern, resource):

        pattern_start_time = datetime.datetime.now()

        #-----------------------------------------------------------------------
        #
        def unit_state_cb (unit, state) :

            if state == radical.pilot.FAILED:
                self.get_logger().error("ComputeUnit error: STDERR: {0}, STDOUT: {0}".format(unit.stderr, unit.stdout))
                self.get_logger().error("Pattern execution FAILED.")


        self.get_logger().info("Executing simulation-analysis loop with {0} iterations on {1} allocated core(s) on '{2}'".format(pattern.iterations, resource._cores, resource._resource_key))

        working_dirs = {}
        all_cus = []

        pattern._execution_profile = []

        try:
            resource._umgr.register_callback(unit_state_cb)

            ########################################################################
            # execute pre_loop
            #
            try:
                pre_loop = pattern.pre_loop()
                pre_loop._bind_to_resource(resource._resource_key)

                cu = radical.pilot.ComputeUnitDescription()
                cu.name = "pre_loop"

                cu.pre_exec       = pre_loop._cu_def_pre_exec
                cu.executable     = pre_loop._cu_def_executable
                cu.arguments      = pre_loop.arguments
                cu.mpi            = pre_loop.uses_mpi
                cu.input_staging  = pre_loop._cu_def_input_data
                cu.output_staging = pre_loop._cu_def_output_data

                self.get_logger().debug("Created pre_loop CU: {0}.".format(cu.as_dict()))

                unit = resource._umgr.submit_units(cu)
                all_cus.append(unit)

                self.get_logger().info("Submitted ComputeUnit(s) for pre_loop step.")
                self.get_logger().info("Waiting for ComputeUnit(s) in pre_loop step to complete.")
                resource._umgr.wait_units()
                self.get_logger().info("Pre_loop completed.")

                if unit.state != radical.pilot.DONE:
                    raise EnsemblemdError("Pre-loop CU failed with error: {0}".format(unit.stdout))

                working_dirs["pre_loop"] = saga.Url(unit.working_directory).path

            except Exception:
                # Doesn't exist. That's fine as it is not mandatory.
                self.get_logger().info("pre_loop() not defined. Skipping.")
                pass

            ########################################################################
            # execute simulation analysis loop
            #
            for iteration in range(1, pattern.iterations+1):

                working_dirs['iteration_{0}'.format(iteration)] = {}

                ################################################################
                # EXECUTE SIMULATION STEPS
                step_timings = {
                    "name": "simulation_iteration_{0}".format(iteration),
                    "timings": {}
                }
                step_start_time_abs = datetime.datetime.now()

                s_units = []
                for s_instance in range(1, pattern._simulation_instances+1):

                    sim_step = pattern.simulation_step(iteration=iteration, instance=s_instance)

                    sim_step._bind_to_resource(resource._resource_key)

                    # Resolve all placeholders
                    if sim_step.link_input_data is not None:
                        for i in range(len(sim_step.link_input_data)):
                            sim_step.link_input_data[i] = resolve_placeholder_vars(working_dirs, s_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "simulation", sim_step.link_input_data[i])

                    cud = radical.pilot.ComputeUnitDescription()
                    cud.name = "sim ;{iteration} ;{instance}".format(iteration=iteration, instance=s_instance)

                    cud.pre_exec       = sim_step._cu_def_pre_exec
                    cud.executable     = sim_step._cu_def_executable
                    cud.arguments      = sim_step.arguments
                    cud.mpi            = sim_step.uses_mpi
                    cud.input_staging  = sim_step._cu_def_input_data
                    cud.output_staging = sim_step._cu_def_output_data

                    # This is a good time to replace all placeholders in the
                    # pre_exec list.

                    try:
                        cud.cores = sim_step.cores
                    except:
                        pass

                    s_units.append(cud)
                    self.get_logger().debug("Created simulation CU: {0}.".format(cud.as_dict()))

                s_cus = resource._umgr.submit_units(s_units)
                all_cus.extend(s_cus)

                self.get_logger().info("Submitted tasks for simulation iteration {0}.".format(iteration))
                self.get_logger().info("Waiting for simulations in iteration {0} to complete.".format(iteration))
                resource._umgr.wait_units()
                self.get_logger().info("Simulations in iteration {0} completed.".format(iteration))


                failed_units = ""
                for unit in s_cus:
                    if unit.state != radical.pilot.DONE:
                        failed_units += " * Simulation task {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                # TODO: ensure working_dir <-> instance mapping
                i = 0
                for cu in s_cus:
                    i += 1
                    working_dirs['iteration_{0}'.format(iteration)]['simulation_{0}'.format(i)] = saga.Url(cu.working_directory).path

                step_end_time_abs = datetime.datetime.now()

                # Process CU information and append it to the dictionary
                tinfo = extract_timing_info(s_cus, pattern_start_time, step_start_time_abs, step_end_time_abs)
                for key, val in tinfo.iteritems():
                    step_timings['timings'][key] = val

                # Write the whole thing to the profiling dict
                pattern._execution_profile.append(step_timings)


                ################################################################
                # EXECUTE ANALYSIS STEPS
                step_timings = {
                    "name": "analysis_iteration_{0}".format(iteration),
                    "timings": {}
                }
                step_start_time_abs = datetime.datetime.now()

                a_units = []
                analysis_list = None
                for a_instance in range(1, pattern._analysis_instances+1):

                    analysis_list = pattern.analysis_step(iteration=iteration, instance=a_instance)

                    if not isinstance(analysis_list,list):
                        analysis_list = [analysis_list]

                    if len(analysis_list) > 1:

                        kernel_wd = ""
                        cur_kernel = 1

                        for ana_step in analysis_list:

                            a_units = []
                            ana_step._bind_to_resource(resource._resource_key)

                            # Resolve all placeholders
                            if ana_step.link_input_data is not None:
                                for i in range(len(ana_step.link_input_data)):
                                    ana_step.link_input_data[i] = resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", ana_step.link_input_data[i])

                            cud = radical.pilot.ComputeUnitDescription()
                            cud.name = "ana ; {iteration}; {instance}".format(iteration=iteration, instance=a_instance)

                            cud.pre_exec       = ana_step._cu_def_pre_exec
                            if cur_kernel > 1:
                                cud.pre_exec.append('cp -n %s/*.* .'%kernel_wd)

                            cud.executable     = ana_step._cu_def_executable
                            cud.arguments      = ana_step.arguments
                            cud.mpi            = ana_step.uses_mpi
                            cud.input_staging  = ana_step._cu_def_input_data
                            cud.output_staging = ana_step._cu_def_output_data

                            try:
                                cud.cores = ana_step.cores
                            except:
                                pass

                            a_units.append(cud)
                            self.get_logger().debug("Created analysis CU: {0}.".format(cud.as_dict()))

                            a_cus = resource._umgr.submit_units(a_units)
                            all_cus.extend(a_cus)

                            self.get_logger().info("Submitted tasks for analysis iteration {0}/ kernel {1}.".format(iteration,cur_kernel))
                            self.get_logger().info("Waiting for analysis tasks in iteration {0}/kernel {1} to complete.".format(iteration,cur_kernel))
                            resource._umgr.wait_units()
                            self.get_logger().info("Analysis in iteration {0}/kernel {1}:{2} completed.".format(iteration,cur_kernel,ana_step.name))

                            failed_units = ""
                            for unit in a_cus:
                                if unit.state != radical.pilot.DONE:
                                    failed_units += " * Analysis task {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)
                                else:
                                    kernel_wd = saga.Url(unit.working_directory).path
                                    cur_kernel += 1
                                    working_dirs['iteration_{0}'.format(iteration)]['analysis_1'] = saga.Url(unit.working_directory).path

                    else:
                        analysis_step = analysis_list[0]
                        analysis_step._bind_to_resource(resource._resource_key)

                        # Resolve all placeholders
                        if analysis_step.link_input_data is not None:
                            for i in range(len(analysis_step.link_input_data)):
                                analysis_step.link_input_data[i] = resolve_placeholder_vars(working_dirs, a_instance, iteration, pattern._simulation_instances, pattern._analysis_instances, "analysis", analysis_step.link_input_data[i])

                        cud = radical.pilot.ComputeUnitDescription()
                        cud.name = "ana; {iteration};{instance}".format(iteration=iteration, instance=a_instance)

                        cud.pre_exec       = analysis_step._cu_def_pre_exec
                        cud.executable     = analysis_step._cu_def_executable
                        cud.arguments      = analysis_step.arguments
                        cud.mpi            = analysis_step.uses_mpi
                        cud.input_staging  = analysis_step._cu_def_input_data
                        cud.output_staging = analysis_step._cu_def_output_data

                        a_units.append(cud)

                        self.get_logger().debug("Created analysis CU: {0}.".format(cud.as_dict()))

                if len(analysis_list)==1:
                    a_cus = resource._umgr.submit_units(a_units)
                    all_cus.extend(a_cus)


                    self.get_logger().info("Submitted tasks for analysis iteration {0}.".format(iteration))
                    self.get_logger().info("Waiting for analysis tasks in iteration {0} to complete.".format(iteration))
                    resource._umgr.wait_units()
                    self.get_logger().info("Analysis in iteration {0} completed.".format(iteration))

                    failed_units = ""
                    for unit in a_cus:
                        if unit.state != radical.pilot.DONE:
                            failed_units += " * Analysis task {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                     # TODO: ensure working_dir <-> instance mapping
                        i = 0
                        for cu in a_cus:
                            i += 1
                            working_dirs['iteration_{0}'.format(iteration)]['analysis_{0}'.format(i)] = saga.Url(cu.working_directory).path

                step_end_time_abs = datetime.datetime.now()

                # Process CU information and append it to the dictionary
                tinfo = extract_timing_info(a_cus, pattern_start_time, step_start_time_abs, step_end_time_abs)

                for key, val in tinfo.iteritems():
                    step_timings['timings'][key] = val

                # Write the whole thing to the profiling dict
                pattern._execution_profile.append(step_timings)

        except Exception, ex:
            self.get_logger().error("Fatal error during execution: {0}.".format(str(ex)))
            raise
    def execute_pattern(self, pattern, resource):
        try:
            try:
                cycles = pattern.nr_cycles+1
            except:
                self.get_logger().exception("Number of cycles (nr_cycles) must be defined for pattern ReplicaExchange!")
                raise

            do_profile = os.getenv('RADICAL_ENMD_PROFILING', '0')

            if do_profile == '1':
                pattern._execution_profile = []
                all_cus = []
 
            # shared data
            pattern.prepare_shared_data()

            shared_input_file_urls = pattern.shared_urls
            shared_input_files = pattern.shared_files
            sd_shared_list = []

            for i in range(len(shared_input_files)):

                sd_pilot = {'source': shared_input_file_urls[i],
                            'target': 'staging:///%s' % shared_input_files[i],
                            'action': radical.pilot.TRANSFER
                }

                resource._pilot.stage_in(sd_pilot)

                sd_shared = {'source': 'staging:///%s' % shared_input_files[i],
                             'target': shared_input_files[i],
                             'action': radical.pilot.COPY
                }
                sd_shared_list.append(sd_shared)

            # Pilot must be active
            resource._pmgr.wait_pilots(resource._pilot.uid,'Active')       
     
            if do_profile == '1':
                pattern_start_time = datetime.datetime.now()

            replicas = pattern.get_replicas()

            for c in range(1, cycles):
                if do_profile == '1':
                    step_timings = {
                        "name": "md_run_{0}".format(c),
                        "timings": {}
                    }
                    step_start_time_abs = datetime.datetime.now()

                md_units = []
                for r in replicas:

                    self.get_logger().info("Cycle %d: Building input files for replica %d" % ((c), r.id) )
                    pattern.build_input_file(r)
                    self.get_logger().info("Cycle %d: Preparing replica %d for MD run" % ((c), r.id) )
                    r_kernel = pattern.prepare_replica_for_md(r)

                    if ((r_kernel._kernel.get_name()) == "md.amber"):
                        r_kernel._bind_to_resource(resource._resource_key, pattern.name)
                    else:
                        r_kernel._bind_to_resource(resource._resource_key)

                    # processing data directives
                    # need means to distinguish between copy and link
                    copy_out = []
                    
                    items_out = r_kernel._kernel._copy_output_data
                    # copy_output_data is not mandatory
                    if items_out:                    
                        for item in items_out:
                            i_out = {
                                'source': item,
                                'target': 'staging:///%s' % item,
                                'action': radical.pilot.COPY
                            }
                            copy_out.append(i_out)

                    cu                = radical.pilot.ComputeUnitDescription()
                    cu.name           = "md ;{cycle} ;{replica}".format(cycle=c, replica=r.id)
                    cu.pre_exec       = r_kernel._cu_def_pre_exec
                    cu.executable     = r_kernel._cu_def_executable
                    cu.arguments      = r_kernel.arguments
                    cu.mpi            = r_kernel.uses_mpi
                    cu.cores          = r_kernel.cores
                    cu.input_staging  = sd_shared_list + r_kernel._cu_def_input_data
                    cu.output_staging = copy_out + r_kernel._cu_def_output_data

                    sub_replica = resource._umgr.submit_units(cu)
                    md_units.append(sub_replica)                    

                if do_profile == '1':
                    all_cus.extend(md_units)
         
                self.get_logger().info("Cycle %d: Performing MD step for replicas" % (c) )
                resource._umgr.wait_units()

                if do_profile == '1':
                    step_end_time_abs = datetime.datetime.now()          
 
                failed_units = ""
                for unit in md_units:
                    if unit.state != radical.pilot.DONE:
                        failed_units += " * MD step: Unit {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                if len(failed_units) > 0:
                    sys.exit()

                if do_profile == '1':
                    # Process CU information and append it to the dictionary
                    if isinstance(pattern_start_time, datetime.datetime):
                        if isinstance(step_start_time_abs, datetime.datetime):
                            if isinstance(step_end_time_abs, datetime.datetime):
                                tinfo = extract_timing_info(md_units, pattern_start_time, step_start_time_abs, step_end_time_abs)
                            else:
                                sys.exit("Ensemble MD Toolkit Error: step_end_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                        else:
                            sys.exit("Ensemble MD Toolkit Error: step_start_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                    else:
                        sys.exit("Ensemble MD Toolkit Error: pattern_start_time is not datetime.datetime instance.")

                    for key, val in tinfo.iteritems():
                        step_timings['timings'][key] = val

                    # Write the whole thing to the profiling dict
                    pattern._execution_profile.append(step_timings)
                #-----------------------------------------------------------

                if (c < cycles):
                    if do_profile == '1':
                        step_timings = {
                            "name": "ex_run_{0}".format(c),
                            "timings": {}
                        }
                        step_start_time_abs = datetime.datetime.now()

                    ex_units = []

                    for r in replicas:
                        self.get_logger().info("Cycle %d: Preparing replica %d for Exchange run" % ((c), r.id) )
                        ex_kernel = pattern.prepare_replica_for_exchange(r)
                        ex_kernel._bind_to_resource(resource._resource_key)
                        
                        cu                = radical.pilot.ComputeUnitDescription()
                        cu.name           = "ex ;{cycle} ;{replica}".format(cycle=c, replica=r.id)
                        cu.pre_exec       = ex_kernel._cu_def_pre_exec
                        cu.executable     = ex_kernel._cu_def_executable
                        cu.arguments      = ex_kernel.arguments
                        cu.mpi            = ex_kernel.uses_mpi
                        cu.cores          = ex_kernel.cores
                        cu.input_staging  = ex_kernel._cu_def_input_data
                        cu.output_staging = ex_kernel._cu_def_output_data

                        sub_replica = resource._umgr.submit_units(cu)
                        ex_units.append(sub_replica)

                    self.get_logger().info("Cycle %d: Performing Exchange step for replicas" % (c) )
                    resource._umgr.wait_units()
 
                    if do_profile == '1':
                        step_end_time_abs = datetime.datetime.now()
                        all_cus.extend(ex_units)

                    failed_units = ""
                    for unit in ex_units:
                        if unit.state != radical.pilot.DONE:
                            failed_units += " * EX step: Unit {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                    if len(failed_units) > 0:
                        sys.exit()

                    if do_profile == '1':
                        # Process CU information and append it to the dictionary
                        if isinstance(pattern_start_time, datetime.datetime):
                            if isinstance(step_start_time_abs, datetime.datetime):
                                if isinstance(step_end_time_abs, datetime.datetime):
                                    tinfo = extract_timing_info(ex_units, pattern_start_time, step_start_time_abs, step_end_time_abs)
                                else:
                                    sys.exit("Ensemble MD Toolkit Error: step_end_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                            else:
                                sys.exit("Ensemble MD Toolkit Error: step_start_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                        else:
                            sys.exit("Ensemble MD Toolkit Error: pattern_start_time is not datetime.datetime instance.")

                        for key, val in tinfo.iteritems():
                            step_timings['timings'][key] = val

                        # Write the whole thing to the profiling dict
                        pattern._execution_profile.append(step_timings)

                        step_timings = {
                            "name": "post_processing_{0}".format(c),
                            "timings": {}
                        }
                        step_start_time_abs = datetime.datetime.now()
                    #---------------------------------------------------------------

                    matrix_columns = []
                    for r in ex_units:
                        d = str(r.stdout)
                        data = d.split()
                        matrix_columns.append(data)

                    # writing swap matrix out
                    sw_file = "matrix_columns_" + str(c)
                    try:
                        w_file = open( sw_file, "w")
                        for i in matrix_columns:
                            for j in i:
                                w_file.write("%s " % j)
                            w_file.write("\n")
                        w_file.close()
                    except IOError:
                        self.get_logger().info('Warning: unable to access file %s' % sw_file)

                    # computing swap matrix
                    self.get_logger().info("Cycle %d: Composing swap matrix" % (c) )
                    swap_matrix = pattern.get_swap_matrix(replicas, matrix_columns)

                    # this is actual exchange
                    for r_i in replicas:
                        r_j = pattern.exchange(r_i, replicas, swap_matrix)
                        if (r_j != r_i):
                            self.get_logger().info("Performing exchange of parameters between replica %d and replica %d" % ( r_j.id, r_i.id ))
                            # swap parameters
                            pattern.perform_swap(r_i, r_j)

                    if do_profile == '1':
                        step_end_time_abs = datetime.datetime.now()

                        # processing timings
                        step_start_time_rel = step_start_time_abs - pattern_start_time
                        step_end_time_rel = step_end_time_abs - pattern_start_time

                        tinfo = {
                                    "step_start_time": {
                                        "abs": step_start_time_abs,
                                        "rel": step_start_time_rel
                                    },
                                    "step_end_time": {
                                        "abs": step_end_time_abs,
                                        "rel": step_end_time_rel
                                    }
                                }

                        for key, val in tinfo.iteritems():
                            step_timings['timings'][key] = val

                        # Write the whole thing to the profiling dict
                        pattern._execution_profile.append(step_timings)
                    #------------------------------------------------------------------    
    
            # End of simulation loop
            #------------------------
            
        except Exception, ex:
            self.get_logger().exception("Fatal error during execution: {0}.".format(str(ex)))
            raise
    def execute_pattern(self, pattern, resource):
        try:
            try:
                cycles = pattern.nr_cycles+1
            except:
                self.get_logger().exception("Number of cycles (nr_cycles) \
                    must be defined for pattern ReplicaExchange!")
                raise

            do_profile = os.getenv('RADICAL_ENMD_PROFILING', '0')

            if do_profile == '1':
                pattern._execution_profile = []
                all_cus = []
 
            # shared data
            pattern.prepare_shared_data()

            shared_input_file_urls = pattern.shared_urls
            shared_input_files = pattern.shared_files
            sd_shared_list = []

            for i in range(len(shared_input_files)):

                sd_pilot = {'source': shared_input_file_urls[i],
                            'target': 'staging:///%s' % shared_input_files[i],
                            'action': radical.pilot.TRANSFER
                }

                resource._pilot.stage_in(sd_pilot)

                sd_shared = {'source': 'staging:///%s' % shared_input_files[i],
                             'target': shared_input_files[i],
                             'action': radical.pilot.COPY
                }
                sd_shared_list.append(sd_shared)

            # Pilot must be active
            resource._pmgr.wait_pilots(resource._pilot.uid,'Active')       
     
            if do_profile == '1':
                pattern_start_time = datetime.datetime.now()

            replicas = pattern.get_replicas()

            #-------------------------------------------------------------------
            # GL = 0: submit global calculator before
            # GL = 1: submit global calculator after
            GL = 0

            for c in range(1, cycles):
                if do_profile == '1':
                    step_timings = {
                        "name": "md_run_{0}".format(c),
                        "timings": {}
                    }
                    step_start_time_abs = datetime.datetime.now()

                md_units = []
                cus = []
                for r in replicas:

                    self.get_logger().info("Cycle %d: Preparing replica %d for MD-step" % ((c), r.id) )
                    r_kernel = pattern.prepare_replica_for_md(r)

                    if ((r_kernel._kernel.get_name()) == "md.amber"):
                        r_kernel._bind_to_resource(resource._resource_key, pattern.name)
                    else:
                        r_kernel._bind_to_resource(resource._resource_key)

                    # processing data directives
                    # need means to distinguish between copy and link
                    #-----------------------------------------------------------
                    copy_out = []
                    items_out = r_kernel._kernel._copy_output_data
                    if items_out:                    
                        for item in items_out:
                            i_out = {
                                'source': item,
                                'target': 'staging:///%s' % item,
                                'action': radical.pilot.COPY
                            }
                            copy_out.append(i_out)

                    #-----------------------------------------------------------
                    copy_in = []
                    items_in = r_kernel._kernel._copy_input_data
                    if items_in:                    
                        for item in items_in:
                            i_in = {
                                'source': 'staging:///%s' % item,
                                'target': item,
                                'action': radical.pilot.COPY
                            }
                            copy_in.append(i_in)
                            
                    #-----------------------------------------------------------
                    cu                = radical.pilot.ComputeUnitDescription()
                    cu.name           = "md ;{cycle} ;{replica}"\
                                        .format(cycle=c, replica=r.id)

                    cu.pre_exec       = r_kernel._cu_def_pre_exec
                    cu.executable     = r_kernel._cu_def_executable
                    cu.post_exec       = r_kernel._cu_def_post_exec
                    cu.arguments      = r_kernel.arguments
                    cu.mpi            = r_kernel.uses_mpi
                    cu.cores          = r_kernel.cores
                    #-----------------------------------------------------------
                    in_list = []
                    if r_kernel._cu_def_input_data:
                        in_list = in_list + r_kernel._cu_def_input_data
                    if copy_in:
                        in_list = in_list + copy_in
                    cu.input_staging  = in_list
                    #-----------------------------------------------------------
                    out_list = []
                    if r_kernel._cu_def_output_data:
                        out_list = out_list + r_kernel._cu_def_output_data
                    if copy_out:
                        out_list = out_list + copy_out
                    cu.output_staging = out_list
                    #-----------------------------------------------------------
                    cus.append(cu)
         
                # bulk submission
                sub_replicas = resource._umgr.submit_units(cus)  
                for r in sub_replicas:
                    md_units.append(r)                 

                if do_profile == '1':
                    all_cus.extend(md_units)
         
                self.get_logger().info("Cycle %d: Performing MD-step for replicas" % (c) )
                resource._umgr.wait_units()

                if do_profile == '1':
                    step_end_time_abs = datetime.datetime.now()          
 
                failed_units = ""
                for unit in md_units:
                    if unit.state != radical.pilot.DONE:
                        failed_units += " * MD step: Unit {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                if len(failed_units) > 0:
                    sys.exit()

                if do_profile == '1':
                    # Process CU information and append it to the dictionary
                    if isinstance(pattern_start_time, datetime.datetime):
                        if isinstance(step_start_time_abs, datetime.datetime):
                            if isinstance(step_end_time_abs, datetime.datetime):
                                tinfo = extract_timing_info(md_units, pattern_start_time, step_start_time_abs, step_end_time_abs)
                            else:
                                sys.exit("Ensemble MD Toolkit Error: step_end_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                        else:
                            sys.exit("Ensemble MD Toolkit Error: step_start_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                    else:
                        sys.exit("Ensemble MD Toolkit Error: pattern_start_time is not datetime.datetime instance.")

                    for key, val in tinfo.iteritems():
                        step_timings['timings'][key] = val

                    # Write the whole thing to the profiling dict
                    pattern._execution_profile.append(step_timings)
                #---------------------------------------------------------------

                if (c <= cycles):
                    if do_profile == '1':
                        step_timings = {
                            "name": "ex_run_{0}".format(c),
                            "timings": {}
                        }
                        step_start_time_abs = datetime.datetime.now()
                    #-----------------------------------------------------------
                    # global calc
                    #----------- ------------------------------------------------
                    ex_units = []

                    self.get_logger().info("Cycle %d: Preparing replicas for Exchange-Step" % (c) )

                    gl_ex_kernel = pattern.prepare_global_ex_calc(GL, c, \
                                                                  replicas)
                    gl_ex_kernel._bind_to_resource(resource._resource_key)

                    cu = radical.pilot.ComputeUnitDescription()

                    #-----------------------------------------------------------
                    copy_out = []
                    
                    items_out = gl_ex_kernel._kernel._copy_output_data
                    if items_out:                    
                        for item in items_out:
                            i_out = {
                                'source': item,
                                'target': 'staging:///%s' % item,
                                'action': radical.pilot.COPY
                            }
                            copy_out.append(i_out)
                    #-----------------------------------------------------------
                    copy_in = []
                    
                    items_in = gl_ex_kernel._kernel._copy_input_data
                    if items_in:                    
                        for item in items_in:
                            i_in = {
                                'source': 'staging:///%s' % item,
                                'target': item,
                                'action': radical.pilot.COPY
                            }
                            copy_in.append(i_in)
                    #-----------------------------------------------------------
                    in_list = []
                    if gl_ex_kernel._cu_def_input_data:
                        in_list = in_list + gl_ex_kernel._cu_def_input_data
                    if copy_in:
                        in_list = in_list + copy_in
                    cu.input_staging  = in_list
                    #-----------------------------------------------------------
                    out_list = []
                    if gl_ex_kernel._cu_def_output_data:
                        out_list = out_list + gl_ex_kernel._cu_def_output_data
                    if copy_out:
                        out_list = out_list + copy_out
                    cu.output_staging = out_list
                    #-----------------------------------------------------------
                    cu.pre_exec       = gl_ex_kernel._cu_def_pre_exec
                    cu.executable     = gl_ex_kernel._cu_def_executable
                    cu.post_exec      = gl_ex_kernel._cu_def_post_exec
                    cu.arguments      = gl_ex_kernel.arguments
                    cu.mpi            = gl_ex_kernel.uses_mpi
                    cu.cores          = gl_ex_kernel.cores

                    sub_replica = resource._umgr.submit_units(cu)
                    resource._umgr.wait_units()

                    ex_units.append(sub_replica)
                    
                    if do_profile == '1':
                        step_end_time_abs = datetime.datetime.now()
                        all_cus.extend(ex_units)
                    
                    failed_units = ""
                    for unit in ex_units:
                        if unit.state != radical.pilot.DONE:
                            failed_units += " * EX step: Unit {0} failed with an error: {1}\n".format(unit.uid, unit.stderr)

                    if len(failed_units) > 0:
                        sys.exit()
                    
                    if do_profile == '1':
                        # Process CU information and append it to the dictionary
                        if isinstance(pattern_start_time, datetime.datetime):
                            if isinstance(step_start_time_abs, datetime.datetime):
                                if isinstance(step_end_time_abs, datetime.datetime):
                                    tinfo = extract_timing_info(ex_units, pattern_start_time, step_start_time_abs, step_end_time_abs)
                                else:
                                    sys.exit("Ensemble MD Toolkit Error: step_end_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                            else:
                                sys.exit("Ensemble MD Toolkit Error: step_start_time_abs for {0} is not datetime.datetime instance.".format(step_timings["name"]))
                        else:
                            sys.exit("Ensemble MD Toolkit Error: pattern_start_time is not datetime.datetime instance.")

                        for key, val in tinfo.iteritems():
                            step_timings['timings'][key] = val

                        # Write the whole thing to the profiling dict
                        pattern._execution_profile.append(step_timings)

                        step_timings = {
                            "name": "post_processing_{0}".format(c),
                            "timings": {}
                        }
                        step_start_time_abs = datetime.datetime.now()
                    
                    #-----------------------------------------------------------
                    pattern.do_exchange(c, replicas)

                    if do_profile == '1':
                        step_end_time_abs = datetime.datetime.now()

                        # processing timings
                        step_start_time_rel = step_start_time_abs - pattern_start_time
                        step_end_time_rel = step_end_time_abs - pattern_start_time

                        tinfo = {
                                    "step_start_time": {
                                        "abs": step_start_time_abs,
                                        "rel": step_start_time_rel
                                    },
                                    "step_end_time": {
                                        "abs": step_end_time_abs,
                                        "rel": step_end_time_rel
                                    }
                                }

                        for key, val in tinfo.iteritems():
                            step_timings['timings'][key] = val

                        # Write the whole thing to the profiling dict
                        pattern._execution_profile.append(step_timings)
                    #-----------------------------------------------------------    
    
            # End of simulation loop
            #-------------------------------------------------------------------
            self.get_logger().info("Replica Exchange simulation finished successfully!")
            
        
        except KeyboardInterrupt:
            traceback.print_exc()