def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="md.amber")
        k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)),
                       #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                       "--cycle=%s"%(iteration),
                       "--instance=%s"%(instance)]
        k1.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.minimization_input_file)),
                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
        k1.cores=1
        if((iteration-1)==0):
            k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/{0} > min1.crd'.format(os.path.basename(Kconfig.initial_crd_file))]
        else:
            k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/iter{2}/min{0}{1}.crd > min{2}.crd'.format(iteration-1,instance-1,iteration)]
        k1.copy_output_data = ['md{0}.crd > $PRE_LOOP/iter{0}/md_{0}_{1}.crd'.format(iteration,instance)]
        

        k2 = Kernel(name="md.amber")
        k2.arguments = [
                            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                            "--cycle=%s"%(iteration),
                            "--instance=%s"%(instance)
                        ]
        k2.link_input_data = [  
                                "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)),
                                "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)),
                                "$PRE_LOOP/iter{0}/md_{0}_{1}.crd > md{0}.crd".format(iteration,instance),
                            ]
#        if(iteration%Kconfig.nsave==0):
#            k1.download_output_data = ['md{0}.ncdf > backup/iter{0}/md_{0}_{1}.ncdf'.format(iteration,instance)]

        k2.cores = 1
        return [k1,k2]
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename and/or reference coordinates file filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="custom.amber")
        k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                       "--cycle=%s"%(iteration)]
        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.minimization_input_file)),
                             '$SHARED/{0}'.format(os.path.basename(Kconfig.top_file)),
                             '$SHARED/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
        k1.cores = Kconfig.num_cores_per_sim_cu
        if((iteration-1)==0):
            k1.link_input_data = k1.link_input_data + ['$SHARED/{0} > min1.rst7'.format(os.path.basename(Kconfig.initial_crd_file))]
            k1.copy_output_data = ['min1.rst7 > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)]
        else:
            k1.link_input_data = k1.link_input_data + ['$SHARED/min_{0}_{1}.rst7 > min{2}.rst7'.format(iteration-1,instance-1,iteration)]
            k1.copy_output_data = ['md{0}.rst > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)]
        

        k2 = Kernel(name="custom.amber")
        k2.arguments = [
                            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                            "--cycle=%s"%(iteration)
                
                        ]
        k2.link_input_data = [  
                                "$SHARED/{0}".format(os.path.basename(Kconfig.md_input_file)),
                                "$SHARED/{0}".format(os.path.basename(Kconfig.top_file)),
                                "$SHARED/md_{0}_{1}.rst > md{0}.rst".format(iteration,instance),
                            ]
	k2.cores = Kconfig.num_cores_per_sim_cu
        if(iteration%Kconfig.nsave==0):
            k2.download_output_data = ['md{0}.nc > output/iter{0}/md_{0}_{1}.nc'.format(iteration,instance)]

        return [k1,k2]
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k = Kernel(name="md.coco")
        k.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints={0}".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--mdfile=*.ncdf",
                       "--output=pentaopt%s"%(iteration),
                       "--cycle=%s"%(iteration)]
        k.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                             '$PRE_LOOP/postexec.py']
        k.cores = RPconfig.PILOTSIZE
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):
                k.link_input_data = k.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)]
        return k
    def stage_1(self, instance):
        k1 = Kernel(name="md.gromacs")
	k1.upload_input_data  = ['in.gro', 'in.top', '*.itp', 'in.mdp'] 
        k1.executable = ['path/to/gromacs/gmx']
        k1.arguments = ['grompp', '-f', 'in.mdp', '-c', 'in.gro', '-o', 'in.tpr', '-p', 'in.top']
        k1.cores = 1
        
        return k1
Example #5
0
    def stage_2(self, instance):
        k2 = Kernel(name="md.gromacs")
        k2.link_input_data = ['$STAGE_1/in.tpr > in.tpr']
        k2.executable = ['path/to/gromacs/gmx']
        k2.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out']
        k2.cores = 1

        return k2
 def stage_2(self, instance):
     k2 = Kernel(name="md.gromacs")
     k2.link_input_data = ['$STAGE_1/in.tpr > in.tpr']
     k2.executable = ['path/to/gromacs/gmx']    
     k2.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out']
     k2.cores = 1              
     
     return k2
 def stage_2(self, instance):
     k = Kernel(name="misc.ccount")
     k.arguments = [
         "--inputfile=asciifile-{0}.dat".format(instance),
         "--outputfile=cfreqs-{0}.dat".format(instance)
     ]
     k.link_input_data = "$STEP_1/asciifile-{0}.dat".format(instance)
     k.download_output_data = "cfreqs-{0}.dat".format(instance)
     k.cores = 1
     return k
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="custom.coco")
        k1.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints=64".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--mdfile=*.ncdf",
                       "--output=pdbs",
                       "--atom_selection={0}".format(Kconfig.atom_selection)]
        #k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1.cores = 64
        k1.uses_mpi = True

        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.top_file))]

        for iter in range(1,iteration+1):
#            for i in range(1,Kconfig.num_CUs+1):
	     for i in range(1+(instance-1)*64, instance*64 + 1):
                k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)]


        k1.copy_output_data = list()
        for i in range(0,64):
            k1.copy_output_data = k1.copy_output_data + ['pdbs{1}.pdb > $SHARED/pentaopt{0}{2}.pdb'.format(iteration,i,(instance-1)*64+i)]


        if(iteration%Kconfig.nsave==0):
            k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)]


        k2 = Kernel(name="custom.tleap",instance_type='single')
        k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs),
                        "--cycle={0}".format(iteration)]

        k2.link_input_data = ['$SHARED/postexec.py > postexec.py']
        for i in range(0,Kconfig.num_CUs):
            k2.link_input_data = k2.link_input_data + ['$SHARED/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)]

        return [k1,k2]
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. 
        

        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iteration number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''

        k1_ana_kernel = Kernel(name="md.coco")

        k1_ana_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                                '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'.format(iteration,iteration-1)]
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):        
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format(iter,i,iter-1,i-1)]

        
        k1_ana_kernel.cores = 1
        k1_ana_kernel.uses_mpi = False
        
        outbase, ext = os.path.basename(Kconfig.output).split('.')

        #Not sure why this if condition is required
        #if ext == '':
	#		ext = '.pdb'
                
        k1_ana_kernel.arguments = ["--grid={0}".format(Kconfig.grid),
                                   "--dims={0}".format(Kconfig.dims),
                                   "--frontpoints={0}".format(Kconfig.num_CUs),
                                   "--topfile=md-{0}_0.gro".format(iteration-1),
                                   "--mdfile=*.xtc",
                                   "--output={0}_{1}.{2}".format(outbase,iteration-1,ext),
                                   "--atom_selection={0}".format(Kconfig.sel)]

        k1_ana_kernel.copy_output_data = []
        for i in range(0,Kconfig.num_CUs):
            k1_ana_kernel.copy_output_data += ["{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format(outbase,iteration-1,i,ext)]

        k1_ana_kernel.download_output_data = ["coco.log > output/coco-iter{0}.log".format(iteration-1)]	
        

        return [k1_ana_kernel]
Example #10
0
    def stage_1(self, instance):
        k1 = Kernel(name="md.gromacs")
        k1.upload_input_data = ['in.gro', 'in.top', '*.itp', 'in.mdp']
        k1.executable = ['path/to/gromacs/gmx']
        k1.arguments = [
            'grompp', '-f', 'in.mdp', '-c', 'in.gro', '-o', 'in.tpr', '-p',
            'in.top'
        ]
        k1.cores = 1

        return k1
    def analysis_step(self, iteration, instance):

        link_input_data = []
        for i in range(1,self.simulation_instances+1):
            link_input_data.append("$PREV_SIMULATION_INSTANCE_{instance}/asciifile.dat > asciifile-{instance}.dat".format(instance=i))

        k = Kernel(name="misc.ccount")
        k.arguments = ["--inputfile=asciifile.dat", "--outputfile=cfreqs.dat"]
        k.link_input_data = link_input_data
        k.download_output_data = "cfreqs.dat"
        k.cores = 1
        return k
Example #12
0
    def analysis_step(self, iteration, instance):

        link_input_data = []
        for i in range(1, self.simulation_instances + 1):
            link_input_data.append(
                "$PREV_SIMULATION_INSTANCE_{instance}/asciifile.dat > asciifile-{instance}.dat"
                .format(instance=i))

        k = Kernel(name="misc.ccount")
        k.arguments = ["--inputfile=asciifile.dat", "--outputfile=cfreqs.dat"]
        k.link_input_data = link_input_data
        k.download_output_data = "cfreqs.dat"
        k.cores = 1
        return k
    def step_2(self, instance):
        """The second step of the pipeline does a character frequency analysis
           on the file generated the first step. The result is transferred back
           to the host running this script.

           ..note:: The placeholder ``$STEP_1`` used in ``link_input_data`` is
                    a reference to the working directory of step 1. ``$STEP_``
                    can be used analogous to refernce other steps.
        """
        k = Kernel(name="misc.ccount")
        k.arguments            = ["--inputfile=asciifile-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.dat".format(instance)]
        k.link_input_data      = "$STEP_1/asciifile-{0}.dat".format(instance)
        k.download_output_data = "cfreqs-{0}.dat".format(instance)
        k.cores = 1
        return k
    def step_1(self, instance):

	k1 = Kernel(name="md.amber")
        k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)),
                       #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                       "--cycle=%s"%(1)]
        k1.upload_input_data = [Kconfig.minimization_input_file,
                             	Kconfig.top_file,
                             	Kconfig.initial_crd_file]
        k1.cores=1
        k1.upload_input_data = k1.upload_input_data + ['{0} > min1.crd'.format(Kconfig.initial_crd_file)]
        #k1.copy_output_data =['md{0}.crd > $PRE_LOOP/md_{0}_{1}.crd'.format(1,instance)]

	return k1
    def prepare_replica_for_md(self, replica):
        """Specifies input and output files and passes them to kernel

        Arguments:
        replica - object representing a given replica and it's associated parameters
        """
        input_name = self.inp_basename + "_" + str(replica.id) + "_" + str(replica.cycle) + ".md"
        output_name = self.inp_basename + "_" + str(replica.id) + "_" + str(replica.cycle) + ".out"

        k = Kernel(name="misc.ccount")
        k.arguments            = ["--inputfile=" + input_name, "--outputfile=" + output_name]
        k.upload_input_data      = input_name
        k.download_output_data = output_name
        k.cores = 1

        replica.cycle = replica.cycle + 1
        return k
    def prepare_replica_for_md(self, replica):
        input_name = self.inp_basename + "_" + \
                     str(replica.id) + "_" + \
                     str(replica.cycle) + ".md"
        output_name = self.inp_basename + "_" + \
                      str(replica.id) + "_" + \
                      str(replica.cycle) + ".out"

        k = Kernel(name="misc.ccount")
        k.arguments            = ["--inputfile=" + input_name, 
                                  "--outputfile=" + output_name]
        k.upload_input_data      = input_name
        k.download_output_data = output_name
        k.cores = 1

        replica.cycle = replica.cycle + 1
        return k
    def prepare_replica_for_md(self, replica):
        input_name = self.inp_basename + "_" + \
                     str(replica.id) + "_" + \
                     str(replica.cycle) + ".md"
        output_name = self.inp_basename + "_" + \
                      str(replica.id) + "_" + \
                      str(replica.cycle) + ".out"

        k = Kernel(name="misc.ccount")
        k.arguments = [
            "--inputfile=" + input_name, "--outputfile=" + output_name
        ]
        k.upload_input_data = input_name
        k.download_output_data = output_name
        k.cores = 1

        replica.cycle = replica.cycle + 1
        return k
    def step_2(self,instance):

        k2 = Kernel(name="md.amber")
        k2.arguments = [
                            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                            "--cycle=%s"%(1)
                        ]

	k2.upload_input_data = [Kconfig.md_input_file]
        k2.link_input_data = [
                                "$STEP_1/{0}".format(os.path.basename(Kconfig.top_file)),
                                "$STEP_1/md{0}.crd > md{0}.crd".format(1),
                            ]

        k2.cores = 1

        return k2
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="custom.coco")
        k1.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints={0}".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.ref_file)),
                       "--mdfile=*.nc",
                       "--output=coco.rst7",
                       "--atom_selection={0}".format(Kconfig.atom_selection)]
        k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1.uses_mpi = True

        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.ref_file))]
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):
                k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.nc > md_{0}_{1}.nc'.format(iter,i)]

        k1.copy_output_data = list()
        for i in range(0,Kconfig.num_CUs):
            k1.copy_output_data = k1.copy_output_data + ['coco{1}.rst7 > $SHARED/min_{0}_{1}.rst7'.format(iteration,i)]

        if(iteration%Kconfig.nsave==0):
            k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)]


        return k1
Example #20
0
    def stage_1(self, instance):

        global INPUT_PAR
        global ENSEMBLE_SIZE

        # "simulation" tasks
        if instance <= ENSEMBLE_SIZE:

            k1 = Kernel(name="sleep")
            k1.arguments = ["--file=output.txt", "--text=simulation",
                            "--duration={0}".format(INPUT_PAR_Q[instance - 1])]
            k1.cores = 1

            # File staging can be added using the following
            # k1.upload_input_data = []
            # k1.copy_input_data = []
            # k1.link_input_data = []
            # k1.copy_output_data = []
            # k1.download_output_data = []

            return k1

        # "analysis" task
        else:

            # Emulating some more analysis executin time
            sleep(10)

            # Analysis kernel produces a random integer (<20) to push into INPUT_PAR_Q
            m1 = Kernel(name="randval")
            m1.arguments = ["--upperlimit=20"]

            m1.copy_input_data = []

            # Copy simulation output data
            for inst in range(1, ENSEMBLE_SIZE + 1):
                m1.copy_input_data += [
                    '$ITER_{0}_STAGE_1_TASK_{1}/output.txt'.format(
                        ITER[instance - 1], inst)]

            return m1
    def prepare_replica_for_md(self, replica):
        """Specifies input and output files and passes them to kernel

        Arguments:
        replica - object representing a given replica and it's associated parameters
        """
        input_name = self.inp_basename + "_" + str(replica.id) + "_" + str(
            replica.cycle) + ".md"
        output_name = self.inp_basename + "_" + str(replica.id) + "_" + str(
            replica.cycle) + ".out"

        k = Kernel(name="misc.ccount")
        k.arguments = [
            "--inputfile=" + input_name, "--outputfile=" + output_name
        ]
        k.upload_input_data = input_name
        k.download_output_data = output_name
        k.cores = 1

        replica.cycle = replica.cycle + 1
        return k
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated
        in the previous iteration. 
        - Preprocess the simulation parameters, coordinates structure and topology file to generate the 
        portable binary run - .tpr - file to be used by the simulation run;
        - Run the simulations;
        - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system
        in the periodic boundary conditions simulation box.

        md.grompp: -
        
                Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information
                from the simulation parameters, topology file and the initial coordinates file.
                
                Arguments : --mdp  = simulation parameters file - input
                            --gro  = single coordinates file - input
                            --top  = topology filename - input
                            --ref  = single coordinates file to be used as a reference for position restraints - input
                            --tpr  = portable binary run file - output
        md.mdrun :-

                Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous 
                grompp kernel, using as input the .tpr file generated by the previous grompp kernel.
                Among others generates a .xtc file in each instance, all of which will be used for further analysis.

                Arguments : -deffnm = basename that will be used for all generated files in output but also to determine
                the .tpr file in input.
        '''
        
        kernel_list = []
        
        if((iteration-1)!=0):

            outbase, ext = os.path.basename(Kconfig.output).split('.')
            if ext == '':
		    	ext = '.pdb'
            
            k1_prep_min_kernel = Kernel(name="md.grompp")
            k1_prep_min_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))]			
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'.format(outbase,iteration-2,instance-1,ext)]
            k1_prep_min_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)),
                                            "--ref={0}_{1}{2}.{3}".format(outbase,iteration-2,instance-1,ext),
                                            "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                            "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                                            "--tpr=min-{0}_{1}.tpr".format(iteration-1,instance-1)]
            k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format(iteration-1,instance-1)]    
            kernel_list.append(k1_prep_min_kernel)
            
            k2_min_kernel = Kernel(name="md.mdrun")
            k2_min_kernel.link_input_data = ['$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = ["--deffnm=min-{0}_{1}".format(iteration-1,instance-1)]
            k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format(iteration-1,instance-1)]
            kernel_list.append(k2_min_kernel)
            
            k3_prep_eq_kernel = Kernel(name="md.grompp")
            k3_prep_eq_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(iteration-1,instance-1)]
            k3_prep_eq_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)),
                                           "--ref=min-{0}_{1}.gro".format(iteration-1,instance-1),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                                           "--tpr=eq-{0}_{1}.tpr".format(iteration-1,instance-1)]
            k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="md.mdrun")
            k4_eq_kernel.link_input_data = ['$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = ["--deffnm=eq-{0}_{1}".format(iteration-1,instance-1)]
            k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format(iteration-1,instance-1)]
            kernel_list.append(k4_eq_kernel)
			
        k5_prep_sim_kernel = Kernel(name="md.grompp")
        k5_prep_sim_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)),
                                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))]
        if((iteration-1)==0):
            k5_prep_sim_kernel.link_input_data =  k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
            k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                                           "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)]  
        else:
            k5_prep_sim_kernel.link_input_data =  k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(iteration-1,instance-1)]
            k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                                           "--gro=eq-{0}_{1}.gro".format(iteration-1,instance-1),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)]             
        k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format(iteration-1,instance-1)]        
        kernel_list.append(k5_prep_sim_kernel)
        
        k6_sim_kernel = Kernel(name="md.mdrun")
        k6_sim_kernel.link_input_data = ['$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(iteration-1,instance-1)]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = ["--deffnm=md-{0}_{1}".format(iteration-1,instance-1)]
        k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format(iteration-1,instance-1),
                                          "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format(iteration-1,instance-1)]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="md.trjconv")
        k7_sim_kernel.link_input_data = ["$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iteration-1,instance-1),
                                         "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iteration-1,instance-1),
                                         "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iteration-1,instance-1)]
        k7_sim_kernel.arguments = ["--echo1=System",
                                   "--f1=md-{0}_{1}.gro".format(iteration-1,instance-1),
                                   "--s1=md-{0}_{1}.tpr".format(iteration-1,instance-1),
                                   "--o1=md-{0}_{1}_whole.gro".format(iteration-1,instance-1),
                                   "--pbc1=whole",
                                   "--echo2=System",
                                   "--f2=md-{0}_{1}.xtc".format(iteration-1,instance-1),
                                   "--s2=md-{0}_{1}.tpr".format(iteration-1,instance-1),
                                   "--o2=md-{0}_{1}_whole.xtc".format(iteration-1,instance-1),
                                   "--pbc2=whole"]
        if(iteration%Kconfig.nsave==0):
            k7_sim_kernel.download_output_data = ["md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".format(iteration-1,instance-1)]	        
        kernel_list.append(k7_sim_kernel)              
        
        return kernel_list
    def simulation_stage(self, iteration, instance):

        #shareDir="$SHARED"
        #shareDir="staging://" # $SHARED is place holder and is replaced at runtime by "staging://"
        #https://github.com/radical-cybertools/radical.entk/blob/master/src/radical/entk/execution_plugin/staging/placeholders.py#L25
        #shareDir="/work/fbettenc/radical.pilot.sandbox/p13b01_left_d3_k12_1000_k34_1000"
        # note tried without / before work and failed. diff err for /work/.. than work/..
        #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area"
        shareDir = "/work/fbettenc/p14b01_pool/staging_area"
        prev_sim_last_iter_to_use = 48
        iterMod = iteration + prev_sim_last_iter_to_use

        kernel_list = []

        outbase, ext = os.path.basename(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        if ((iterMod - 1) != 0):
            k1_prep_min_kernel = Kernel(name="custom.grompp")
            k1_prep_min_kernel.link_input_data = [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_1_mdp)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)),
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_1_itp_file))
            ]
            #k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)]
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [
                shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(
                    outbase, iterMod - 2, instance - 1, ext)
            ]

            k1_prep_min_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_1_mdp)),
                "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2,
                                               instance - 1, ext),
                #"--ref={0}".format(os.path.basename(Kconfig.restr_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext),
                "--tpr=min-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
            #k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
            k1_prep_min_kernel.copy_output_data = [
                'min-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) +
                shareDir + '/min-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
            ]
            kernel_list.append(k1_prep_min_kernel)

            k2_min_kernel = Kernel(name="custom.mdrun")
            k2_min_kernel.link_input_data = [
                shareDir + '/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(
                    iterMod - 1, instance - 1)
            ]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = [
                "--deffnm=min-{0}_{1}".format(iterMod - 1, instance - 1)
            ]
            #k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro'.format(iterMod-1,instance-1)]
            k2_min_kernel.copy_output_data = [
                'min-{0}_{1}.gro >'.format(iterMod - 1, instance - 1) +
                shareDir + '/min-{0}_{1}.gro'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k2_min_kernel)

            k3_prep_eq_kernel = Kernel(name="custom.grompp")
            k3_prep_eq_kernel.link_input_data = [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_2_mdp)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)),
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_2_itp_file))
            ]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                shareDir + '/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(
                    iterMod - 1, instance - 1)
            ]
            #k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(
                    outbase, iterMod - 2, instance - 1, ext)
            ]
            k3_prep_eq_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_2_mdp)),
                "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2,
                                               instance - 1, ext),
                #"--ref=min-{0}_{1}.gro".format(iterMod-1,instance-1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                ##"--gro=min-{0}_{1}".format(iterMod-1,instance-1),
                "--gro=min-{0}_{1}.gro".format(iterMod - 1, instance - 1),
                #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext),
                "--tpr=eq-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
            #k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
            k3_prep_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) +
                shareDir + '/eq-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="custom.mdrun")
            k4_eq_kernel.link_input_data = [
                shareDir + '/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(
                    iterMod - 1, instance - 1)
            ]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = [
                "--deffnm=eq-{0}_{1}".format(iterMod - 1, instance - 1)
            ]
            #k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro'.format(iterMod-1,instance-1)]
            k4_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.gro > '.format(iterMod - 1, instance - 1) +
                shareDir + '/eq-{0}_{1}.gro'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k4_eq_kernel)

        k5_prep_sim_kernel = Kernel(name="custom.grompp")
        k5_prep_sim_kernel.link_input_data = [
            shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_3_mdp)),
            shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        if ((iterMod - 1) == 0):
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.initial_crd_file))
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)),
                "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
        else:
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                shareDir + '/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(
                    iterMod - 1, instance - 1)
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)),
                "--gro=eq-{0}_{1}.gro".format(iterMod - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
        #k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
        k5_prep_sim_kernel.copy_output_data = [
            'md-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) + shareDir +
            '/md-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k5_prep_sim_kernel)

        k6_sim_kernel = Kernel(name="custom.mdrun")
        k6_sim_kernel.link_input_data = [
            shareDir + '/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(
                iterMod - 1, instance - 1)
        ]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = [
            "--deffnm=md-{0}_{1}".format(iterMod - 1, instance - 1)
        ]
        #k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1),
        #                                  "md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)]
        k6_sim_kernel.copy_output_data = [
            "md-{0}_{1}.gro > ".format(iterMod - 1, instance - 1) + shareDir +
            "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1),
            "md-{0}_{1}.xtc > ".format(iterMod - 1, instance - 1) + shareDir +
            "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1)
        ]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="custom.trjconv")
        # k7_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iterMod-1,instance-1),
        #                                  "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)]
        k7_sim_kernel.link_input_data = [
            shareDir + "/md-{0}_{1}.gro > md-{0}_{1}.gro".format(
                iterMod - 1, instance - 1),
            shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iterMod - 1, instance - 1)
        ]
        k7_sim_kernel.arguments = [
            "--echo=System",
            "--f=md-{0}_{1}.gro".format(iterMod - 1, instance - 1),
            "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1),
            "--o=md-{0}_{1}_whole.gro".format(iterMod - 1,
                                              instance - 1), "--pbc=whole"
        ]
        #k7_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1)]
        k7_sim_kernel.copy_output_data = [
            "md-{0}_{1}_whole.gro > ".format(iterMod - 1, instance - 1) +
            shareDir + "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k7_sim_kernel)

        k8_sim_kernel = Kernel(name="custom.trjconv")
        #k8_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iterMod-1,instance-1),
        #                                 "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)]
        k8_sim_kernel.link_input_data = [
            shareDir + "/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(
                iterMod - 1, instance - 1),
            shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iterMod - 1, instance - 1)
        ]

        k8_sim_kernel.arguments = [
            "--echo=System",
            "--f=md-{0}_{1}.xtc".format(iterMod - 1, instance - 1),
            "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1),
            "--o=md-{0}_{1}_whole.xtc".format(iterMod - 1,
                                              instance - 1), "--pbc=whole"
        ]
        if (iterMod % Kconfig.nsave == 0):
            k8_sim_kernel.download_output_data = [
                "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".
                format(iterMod - 1, instance - 1)
            ]
        #k8_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)]
        k8_sim_kernel.copy_output_data = [
            "md-{0}_{1}_whole.xtc > ".format(iterMod - 1, instance - 1) +
            shareDir + "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k8_sim_kernel)

        return kernel_list
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated
        in the previous iteration. 
        - Preprocess the simulation parameters, coordinates structure and topology file to generate the 
        portable binary run - .tpr - file to be used by the simulation run;
        - Run the simulations;
        - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system
        in the periodic boundary conditions simulation box.

        md.grompp: -
        
                Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information
                from the simulation parameters, topology file and the initial coordinates file.
                
                Arguments : --mdp  = simulation parameters file - input
                            --gro  = single coordinates file - input
                            --top  = topology filename - input
                            --ref  = single coordinates file to be used as a reference for position restraints - input
                            --tpr  = portable binary run file - output
        md.mdrun :-

                Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous 
                grompp kernel, using as input the .tpr file generated by the previous grompp kernel.
                Among others generates a .xtc file in each instance, all of which will be used for further analysis.

                Arguments : -deffnm = basename that will be used for all generated files in output but also to determine
                the .tpr file in input.
        '''

        kernel_list = []

        if ((iteration - 1) != 0):

            outbase, ext = os.path.basename(Kconfig.output).split('.')
            if ext == '':
                ext = '.pdb'

            k1_prep_min_kernel = Kernel(name="md.grompp")
            k1_prep_min_kernel.link_input_data = [
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))
            ]
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [
                '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'.
                format(outbase, iteration - 2, instance - 1, ext)
            ]
            k1_prep_min_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)),
                "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2,
                                              instance - 1, ext),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                "--tpr=min-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
            k1_prep_min_kernel.copy_output_data = [
                'min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k1_prep_min_kernel)

            k2_min_kernel = Kernel(name="md.mdrun")
            k2_min_kernel.link_input_data = [
                '$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = [
                "--deffnm=min-{0}_{1}".format(iteration - 1, instance - 1)
            ]
            k2_min_kernel.copy_output_data = [
                'min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k2_min_kernel)

            k3_prep_eq_kernel = Kernel(name="md.grompp")
            k3_prep_eq_kernel.link_input_data = [
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))
            ]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                '$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            k3_prep_eq_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)),
                "--ref=min-{0}_{1}.gro".format(iteration - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                "--tpr=eq-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
            k3_prep_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="md.mdrun")
            k4_eq_kernel.link_input_data = [
                '$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = [
                "--deffnm=eq-{0}_{1}".format(iteration - 1, instance - 1)
            ]
            k4_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k4_eq_kernel)

        k5_prep_sim_kernel = Kernel(name="md.grompp")
        k5_prep_sim_kernel.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        if ((iteration - 1) == 0):
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                '$PRE_LOOP/{0}'.format(
                    os.path.basename(Kconfig.initial_crd_file))
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
        else:
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                '$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                "--gro=eq-{0}_{1}.gro".format(iteration - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
        k5_prep_sim_kernel.copy_output_data = [
            'md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format(
                iteration - 1, instance - 1)
        ]
        kernel_list.append(k5_prep_sim_kernel)

        k6_sim_kernel = Kernel(name="md.mdrun")
        k6_sim_kernel.link_input_data = [
            '$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(
                iteration - 1, instance - 1)
        ]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = [
            "--deffnm=md-{0}_{1}".format(iteration - 1, instance - 1)
        ]
        k6_sim_kernel.copy_output_data = [
            "md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format(
                iteration - 1, instance - 1),
            "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format(
                iteration - 1, instance - 1)
        ]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="md.trjconv")
        k7_sim_kernel.link_input_data = [
            "$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format(
                iteration - 1, instance - 1),
            "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(
                iteration - 1, instance - 1),
            "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iteration - 1, instance - 1)
        ]
        k7_sim_kernel.arguments = [
            "--echo1=System",
            "--f1=md-{0}_{1}.gro".format(iteration - 1, instance - 1),
            "--s1=md-{0}_{1}.tpr".format(iteration - 1, instance - 1),
            "--o1=md-{0}_{1}_whole.gro".format(iteration - 1, instance - 1),
            "--pbc1=whole", "--echo2=System",
            "--f2=md-{0}_{1}.xtc".format(iteration - 1, instance - 1),
            "--s2=md-{0}_{1}.tpr".format(iteration - 1, instance - 1),
            "--o2=md-{0}_{1}_whole.xtc".format(iteration - 1,
                                               instance - 1), "--pbc2=whole"
        ]
        if (iteration % Kconfig.nsave == 0):
            k7_sim_kernel.download_output_data = [
                "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".
                format(iteration - 1, instance - 1)
            ]
        kernel_list.append(k7_sim_kernel)

        return kernel_list
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="md.coco")
        k1.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--mdfile=*.ncdf", "--output=pdbs",
            "--atom_selection={0}".format(Kconfig.atom_selection)
        ]
        k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE)
        k1.uses_mpi = True

        k1.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        for iter in range(1, iteration + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1.link_input_data = k1.link_input_data + [
                    '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'
                    .format(iter, i)
                ]

        k1.copy_output_data = list()
        for i in range(0, Kconfig.num_CUs):
            k1.copy_output_data = k1.copy_output_data + [
                'pdbs{1}.pdb > $PRE_LOOP/pentaopt{0}{1}.pdb'.format(
                    iteration, i)
            ]

        if (iteration % Kconfig.nsave == 0):
            k1.download_output_data = [
                'coco.log > output/iter{0}/coco.log'.format(
                    iteration, instance)
            ]

        k2 = Kernel(name="md.tleap")
        k2.arguments = [
            "--numofsims={0}".format(Kconfig.num_CUs),
            "--cycle={0}".format(iteration)
        ]

        k2.link_input_data = ['$PRE_LOOP/postexec.py > postexec.py']
        for i in range(0, Kconfig.num_CUs):
            k2.link_input_data = k2.link_input_data + [
                '$PRE_LOOP/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(
                    iteration, i)
            ]

        return [k1, k2]
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iterMod. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations.


        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iterMod number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''
        #shareDir="$SHARED"
        #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area"
        shareDir = "/work/fbettenc/p14b01_pool/staging_area"

        prev_sim_last_iter_to_use = 48
        iterMod = iteration + prev_sim_last_iter_to_use
        k1_ana_kernel = Kernel(name="custom.coco")

        outbase, ext = os.path.basename(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        k1_ana_kernel.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iterMod - 1), "--mdfile=*.xtc",
            "--output={0}_{1}_.gro".format(outbase, iterMod - 1),
            "--atom_selection={0}".format(Kconfig.sel)
        ]
        # k1_ana_kernel.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1_ana_kernel.cores = min(
            Kconfig.num_CUs * (iterMod + 1), RPconfig.PILOTSIZE
        )  # set to iterMod+1 bec at first iter coco analysis of k8 output so coco is iter ahead sort of

        print " "
        print "iter,iterMod,AnaCUcores = ", iteration, ", ", iterMod, ", ", k1_ana_kernel.cores
        print " "

        k1_ana_kernel.uses_mpi = True
        k1_ana_kernel.link_input_data = [
            shareDir +
            '/md-{1}_0.gro > md-{1}_0.gro'.format(iterMod, iterMod - 1)
        ]
        for iter in range(1, iterMod + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [
                    shareDir + '/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format(
                        iter, i, iter - 1, i - 1)
                ]

        k1_ana_kernel.copy_output_data = []
        for i in range(0, Kconfig.num_CUs):
            #k1_ana_kernel.copy_output_data += ["{0}_{1}_{2}.gro > $SHARED/{0}_{1}_{2}.gro".format(outbase,iterMod-1,i,ext)]
            k1_ana_kernel.copy_output_data += [
                "{0}_{1}_{2}.gro > ".format(outbase, iterMod - 1, i, ext) +
                shareDir +
                "/{0}_{1}_{2}.gro".format(outbase, iterMod - 1, i, ext)
            ]

        k1_ana_kernel.download_output_data = [
            "coco.log > output/coco-iter{0}.log".format(iterMod - 1)
        ]

        return [k1_ana_kernel]
    def simulation_stage(self, iteration, instance):

        kernel_list = []
        iter1 = iteration - 1
        inst1 = instance - 1
        outbase, ext = opb(Kconfig.output).split('.')
        shrd = '$SHARED/{0}'
        if ext == '':
            ext = '.pdb'

        if ((iter1) != 0):
            # Kernel 1: Grompp before energy min step.
            k1 = Kernel(name="custom.grompp")
            k1.link_input_data = [
                shrd.format(opb(Kconfig.grompp_1_mdp)),
                shrd.format(opb(Kconfig.top_file)),
                shrd.format(opb(Kconfig.restr_file)),
                shrd.format(opb(Kconfig.grompp_1_itp_file))
            ]
            prev = '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'
            k1.link_input_data += [
                prev.format(outbase, iteration - 2, inst1, ext)
            ]
            k1.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_1_mdp)),
                "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2, inst1,
                                              ext),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--gro={0}".format(opb(Kconfig.restr_file)),
                "--tpr=min-{0}_{1}.tpr".format(iter1, inst1)
            ]
            cout = 'min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr'
            k1.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k1)

            # Kernel 2: Restrained energy min step.
            k2 = Kernel(name="custom.mdrun")
            lind = '$SHARED/min-{0}_{1}.tpr > min-{0}_{1}.tpr'
            k2.link_input_data = [lind.format(iter1, inst1)]
            k2.cores = Kconfig.num_cores_per_sim_cu
            k2.arguments = ["--deffnm=min-{0}_{1}".format(iter1, inst1)]
            cout = 'min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro'
            k2.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k2)

            # Kernel 3: Grompp before restrained MD step
            k3 = Kernel(name="custom.grompp")
            k3.link_input_data = [
                shrd.format(opb(Kconfig.grompp_2_mdp)),
                shrd.format(opb(Kconfig.top_file)),
                shrd.format(opb(Kconfig.restr_file)),
                shrd.format(opb(Kconfig.grompp_2_itp_file))
            ]
            lind = '$SHARED/min-{0}_{1}.gro > min-{0}_{1}.gro'
            k3.link_input_data += [lind.format(iter1, inst1)]
            k3.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_2_mdp)),
                "--ref=min-{0}_{1}.gro".format(iter1, inst1),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--gro={0}".format(opb(Kconfig.restr_file)),
                "--tpr=eq-{0}_{1}.tpr".format(iter1, inst1)
            ]
            cout = 'eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr'
            k3.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k3)

            # Kernel 4: Restrained MD step.
            k4 = Kernel(name="custom.mdrun")
            lind = '$SHARED/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'
            k4.link_input_data = [lind.format(iter1, inst1)]
            k4.cores = Kconfig.num_cores_per_sim_cu
            k4.arguments = ["--deffnm=eq-{0}_{1}".format(iter1, inst1)]
            cout = 'eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro'
            k4.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k4)

        # Kernel 5: Grompp before unrestrained (production) MD.
        k5 = Kernel(name="custom.grompp")
        k5.link_input_data = [
            shrd.format(opb(Kconfig.grompp_3_mdp)),
            shrd.format(opb(Kconfig.top_file))
        ]
        if ((iter1) == 0):
            k5.link_input_data += [shrd.format(opb(Kconfig.initial_crd_file))]
            k5.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)),
                "--gro={0}".format(opb(Kconfig.initial_crd_file)),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iter1, inst1)
            ]
        else:
            lind = '$SHARED/eq-{0}_{1}.gro > eq-{0}_{1}.gro'
            k5.link_input_data += [lind.format(iter1, inst1)]
            k5.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)),
                "--gro=eq-{0}_{1}.gro".format(iter1, inst1),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iter1, inst1)
            ]
        cout = 'md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr'
        k5.copy_output_data = [cout.format(iter1, inst1)]
        kernel_list.append(k5)

        #Kernel 6: Production MD step.
        k6 = Kernel(name="custom.mdrun")
        lind = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k6.link_input_data = [lind.format(iter1, inst1)]
        k6.cores = Kconfig.num_cores_per_sim_cu
        k6.arguments = ["--deffnm=md-{0}_{1}".format(iter1, inst1)]
        cout = 'md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro'
        xout = 'md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc'
        k6.copy_output_data = [
            cout.format(iter1, inst1),
            xout.format(iter1, inst1)
        ]
        kernel_list.append(k6)

        #Kernel 7: Post-processing of output structure file to correct
        #          PBC effects.
        k7 = Kernel(name="custom.trjconv")
        lind = '$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro'
        tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k7.link_input_data = [
            lind.format(iter1, inst1),
            tpin.format(iter1, inst1)
        ]
        k7.arguments = [
            "--echo=System", "--f=md-{0}_{1}.gro".format(iter1, inst1),
            "--s=md-{0}_{1}.tpr".format(iter1, inst1),
            "--o=md-{0}_{1}_whole.gro".format(iter1, inst1), "--pbc=whole"
        ]
        cout = 'md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro'
        k7.copy_output_data = [cout.format(iter1, inst1)]
        kernel_list.append(k7)

        #Kernel 8: Post-processing of output trajectory file to correct
        #          PBC effects.
        k8 = Kernel(name="custom.trjconv")
        lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc'
        tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k8.link_input_data = [
            lind.format(iter1, inst1),
            tpin.format(iter1, inst1)
        ]
        k8.arguments = [
            "--echo=System", "--f=md-{0}_{1}.xtc".format(iter1, inst1),
            "--s=md-{0}_{1}.tpr".format(iter1, inst1),
            "--o=md-{0}_{1}_whole.xtc".format(iter1, inst1), "--pbc=whole"
        ]
        if (iteration % Kconfig.nsave == 0):
            dout = "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc"
            k8.download_output_data = [dout.format(iter1, inst1)]
        xout = 'md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc'
        k8.copy_output_data = [xout.format(iter1, inst1)]
        kernel_list.append(k8)

        return kernel_list
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="md.amber")
        k1.arguments = [
            "--mininfile={0}".format(
                os.path.basename(Kconfig.minimization_input_file)),
            #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
            "--cycle=%s" % (iteration)
        ]
        k1.link_input_data = [
            '$PRE_LOOP/{0}'.format(
                os.path.basename(Kconfig.minimization_input_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))
        ]
        k1.cores = 1
        if ((iteration - 1) == 0):
            k1.link_input_data = k1.link_input_data + [
                '$PRE_LOOP/{0} > min1.crd'.format(
                    os.path.basename(Kconfig.initial_crd_file))
            ]
        else:
            k1.link_input_data = k1.link_input_data + [
                '$PREV_ANALYSIS_INSTANCE_1/min{0}{1}.crd > min{2}.crd'.format(
                    iteration - 1, instance - 1, iteration)
            ]
        k1.copy_output_data = [
            'md{0}.crd > $PRE_LOOP/md_{0}_{1}.crd'.format(iteration, instance)
        ]

        k2 = Kernel(name="md.amber")
        k2.arguments = [
            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--cycle=%s" % (iteration)
        ]
        k2.link_input_data = [
            "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)),
            "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)),
            "$PRE_LOOP/md_{0}_{1}.crd > md{0}.crd".format(iteration, instance),
        ]
        if (iteration % Kconfig.nsave == 0):
            k2.download_output_data = [
                'md{0}.ncdf > output/iter{0}/md_{0}_{1}.ncdf'.format(
                    iteration, instance)
            ]

        k2.cores = 1
        return [k1, k2]
    def analysis_step(self, iteration, instance):
        '''
        function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the
        new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage
        in the next iteration.

        If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires).
        Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to
        be mentioned by the user.

        pre_lsdmap :-

                Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate
                            such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per
                            iteration.

                Arguments : --numCUs = number of simulation instances / number of small files to be concatenated

        lsdmap :-

                Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance
                            of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model).

                Arguments : --config = name of the config file to be used during LSDMap

        post_lsdmap :-


                Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
                            to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one
                            instance of post_lsdmap per iteration.

                Arguments : --num_runs              = number of configurations to be generated in the new coordinate file
                            --out                   = output filename
                            --cycle                 = iteration number
                            --max_dead_neighbors    = max dead neighbors to be considered
                            --max_alive_neighbors   = max alive neighbors to be considered
                            --numCUs                = number of simulation instances/ number of smaller files
        '''

        pre_ana = Kernel(name="md.pre_lsdmap")
        pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)]
        pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"]
        for i in range(1, Kconfig.num_CUs + 1):
            pre_ana.link_input_data = pre_ana.link_input_data + [
                "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro".
                format(i, i - 1, iteration)
            ]
        pre_ana.copy_output_data = [
            'tmpha.gro > $PRE_LOOP/tmpha.gro', 'tmp.gro > $PRE_LOOP/tmp.gro'
        ]

        lsdmap = Kernel(name="md.lsdmap")
        lsdmap.arguments = [
            "--config={0}".format(os.path.basename(Kconfig.lsdm_config_file))
        ]
        lsdmap.link_input_data = [
            '$PRE_LOOP/{0} > {0}'.format(
                os.path.basename(Kconfig.lsdm_config_file)),
            '$PRE_LOOP/tmpha.gro > tmpha.gro'
        ]
        lsdmap.cores = 1
        if iteration > 1:
            lsdmap.link_input_data += [
                '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w'.
                format(iteration - 1)
            ]
            lsdmap.copy_output_data = ['weight.w > $PRE_LOOP/weight.w']
        lsdmap.copy_output_data = [
            'tmpha.ev > $PRE_LOOP/tmpha.ev', 'out.nn > $PRE_LOOP/out.nn'
        ]

        if (iteration % Kconfig.nsave == 0):
            lsdmap.download_output_data = [
                'lsdmap.log > backup/iter{0}/lsdmap.log'.format(iteration)
            ]

        post_ana = Kernel(name="md.post_lsdmap")
        post_ana.link_input_data = [
            "$PRE_LOOP/post_analyze.py > post_analyze.py",
            "$PRE_LOOP/selection.py > selection.py",
            "$PRE_LOOP/reweighting.py > reweighting.py",
            "$PRE_LOOP/spliter.py > spliter.py", "$PRE_LOOP/gro.py > gro.py",
            "$PRE_LOOP/tmp.gro > tmp.gro", "$PRE_LOOP/tmpha.ev > tmpha.ev",
            "$PRE_LOOP/out.nn > out.nn", "$PRE_LOOP/input.gro > input.gro"
        ]

        post_ana.arguments = [
            "--num_runs={0}".format(Kconfig.num_runs), "--out=out.gro",
            "--cycle={0}".format(iteration - 1),
            "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors),
            "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors),
            "--numCUs={0}".format(Kconfig.num_CUs)
        ]

        if iteration > 1:
            post_ana.link_input_data += [
                '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w'.
                format(iteration - 1)
            ]

        if (iteration % Kconfig.nsave == 0):
            post_ana.download_output_data = [
                'out.gro > backup/iter{0}/out.gro'.format(iteration),
                'weight.w > backup/iter{0}/weight.w'.format(iteration)
            ]

        return [pre_ana, lsdmap, post_ana]
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. 
        

        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iteration number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''

        k1_ana_kernel = Kernel(name="md.coco")

        k1_ana_kernel.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
            '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'.
            format(iteration, iteration - 1)
        ]
        for iter in range(1, iteration + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [
                    '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc'
                    .format(iter, i, iter - 1, i - 1)
                ]

        k1_ana_kernel.cores = 1
        k1_ana_kernel.uses_mpi = False

        outbase, ext = os.path.basename(Kconfig.output).split('.')

        #Not sure why this if condition is required
        #if ext == '':
        #		ext = '.pdb'

        k1_ana_kernel.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iteration - 1), "--mdfile=*.xtc",
            "--output={0}_{1}.{2}".format(outbase, iteration - 1, ext),
            "--atom_selection={0}".format(Kconfig.sel)
        ]

        k1_ana_kernel.copy_output_data = []
        for i in range(0, Kconfig.num_CUs):
            k1_ana_kernel.copy_output_data += [
                "{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format(
                    outbase, iteration - 1, i, ext)
            ]

        k1_ana_kernel.download_output_data = [
            "coco.log > output/coco-iter{0}.log".format(iteration - 1)
        ]

        return [k1_ana_kernel]
    def analysis_step(self, iteration, instance):
        """
        function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the
        new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage
        in the next iteration.

        If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires).
        Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to
        be mentioned by the user.

        pre_lsdmap :-

                Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate
                            such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per
                            iteration.

                Arguments : --numCUs = number of simulation instances / number of small files to be concatenated

        lsdmap :-

                Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance
                            of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model).

                Arguments : --config = name of the config file to be used during LSDMap

        post_lsdmap :-


                Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
                            to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one
                            instance of post_lsdmap per iteration.

                Arguments : --num_runs              = number of configurations to be generated in the new coordinate file
                            --out                   = output filename
                            --cycle                 = iteration number
                            --max_dead_neighbors    = max dead neighbors to be considered
                            --max_alive_neighbors   = max alive neighbors to be considered
                            --numCUs                = number of simulation instances/ number of smaller files
        """

        pre_ana = Kernel(name="md.pre_lsdmap")
        pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)]
        pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"]
        for i in range(1, Kconfig.num_CUs + 1):
            pre_ana.link_input_data = pre_ana.link_input_data + [
                "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro".format(i, i - 1, iteration)
            ]
        pre_ana.copy_output_data = ["tmpha.gro > $PRE_LOOP/tmpha.gro", "tmp.gro > $PRE_LOOP/tmp.gro"]

        lsdmap = Kernel(name="md.lsdmap")
        lsdmap.arguments = ["--config={0}".format(os.path.basename(Kconfig.lsdm_config_file))]
        lsdmap.link_input_data = [
            "$PRE_LOOP/{0} > {0}".format(os.path.basename(Kconfig.lsdm_config_file)),
            "$PRE_LOOP/lsdm.py > lsdm.py",
            "$PRE_LOOP/tmpha.gro > tmpha.gro",
        ]
        lsdmap.cores = RPconfig.PILOTSIZE
        if iteration > 1:
            lsdmap.link_input_data += ["$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w".format(iteration - 1)]
            lsdmap.copy_output_data = ["weight.w > $PRE_LOOP/weight.w"]
        lsdmap.copy_output_data = ["tmpha.ev > $PRE_LOOP/tmpha.ev", "out.nn > $PRE_LOOP/out.nn"]

        if iteration % Kconfig.nsave == 0:
            lsdmap.download_output_data = ["lsdmap.log > backup/iter{0}/lsdmap.log".format(iteration)]

        post_ana = Kernel(name="md.post_lsdmap")
        post_ana.link_input_data = [
            "$PRE_LOOP/post_analyze.py > post_analyze.py",
            "$PRE_LOOP/select.py > select.py",
            "$PRE_LOOP/reweighting.py > reweighting.py",
            "$PRE_LOOP/spliter.py > spliter.py",
            "$PRE_LOOP/gro.py > gro.py",
            "$PRE_LOOP/tmp.gro > tmp.gro",
            "$PRE_LOOP/tmpha.ev > tmpha.ev",
            "$PRE_LOOP/out.nn > out.nn",
            "$PRE_LOOP/input.gro > input.gro",
        ]

        post_ana.arguments = [
            "--num_runs={0}".format(Kconfig.num_runs),
            "--out=out.gro",
            "--cycle={0}".format(iteration - 1),
            "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors),
            "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors),
            "--numCUs={0}".format(Kconfig.num_CUs),
        ]

        if iteration > 1:
            post_ana.link_input_data += [
                "$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w".format(iteration - 1)
            ]

        if iteration % Kconfig.nsave == 0:
            post_ana.download_output_data = [
                "out.gro > backup/iter{0}/out.gro".format(iteration),
                "weight.w > backup/iter{0}/weight.w".format(iteration),
            ]

        return [pre_ana, lsdmap, post_ana]
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from 
        the current iteration. Using the .xtc files generated in all instances,
        generate .gro files (as many as the num_CUs) to be used in the next 
        simulations. 
        

        coco :-

            Purpose : Runs CoCo analysis on a set of MD trajectory files 
                      in this case xtc files and generates several 
                      coordinates file to be used in next cycle

            Arguments : --grid           = Number of points along each dimension
                                           of the CoCo histogram
                        --dims           = The number of projections to 
                                           consider from the input pcz file
                        --frontpoints    = Number of CUs
                        --topfile        = Topology filename
                        --mdfile         = MD Input filename
                        --output         = Output filename
                        --cycle          = Current iteration number
                        --atom_selection = Selection of the biological part of 
                                           the system we want to consider for 
                                           analysis
        '''

        k1 = Kernel(name="custom.coco")
        iter1 = iteration - 1

        outbase, ext = opb(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        k1.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iter1), "--mdfile=*.xtc",
            "--output={0}_{1}.gro".format(outbase, iter1),
            "--atom_selection={0}".format(Kconfig.sel)
        ]
        k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE)
        k1.uses_mpi = True
        lind = '$SHARED/md-{0}_0.gro > md-{0}_0.gro'
        k1.link_input_data = [lind.format(iter1)]
        lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc'
        for iter in range(iteration):
            for i in range(Kconfig.num_CUs):
                k1.link_input_data += [lind.format(iter, i)]

        k1.copy_output_data = []
        cout = '{0}_{1}{2}.gro > $SHARED/{0}_{1}{2}.gro'
        for i in range(Kconfig.num_CUs):
            k1.copy_output_data += [cout.format(outbase, iter1, i)]

        dod = "coco.log > output/coco-iter{0}.log"
        k1.download_output_data = [dod.format(iter1)]

        return [k1]