def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="custom.coco")
        k1.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints=64".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--mdfile=*.ncdf",
                       "--output=pdbs",
                       "--atom_selection={0}".format(Kconfig.atom_selection)]
        #k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1.cores = 64
        k1.uses_mpi = True

        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.top_file))]

        for iter in range(1,iteration+1):
#            for i in range(1,Kconfig.num_CUs+1):
	     for i in range(1+(instance-1)*64, instance*64 + 1):
                k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)]


        k1.copy_output_data = list()
        for i in range(0,64):
            k1.copy_output_data = k1.copy_output_data + ['pdbs{1}.pdb > $SHARED/pentaopt{0}{2}.pdb'.format(iteration,i,(instance-1)*64+i)]


        if(iteration%Kconfig.nsave==0):
            k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)]


        k2 = Kernel(name="custom.tleap",instance_type='single')
        k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs),
                        "--cycle={0}".format(iteration)]

        k2.link_input_data = ['$SHARED/postexec.py > postexec.py']
        for i in range(0,Kconfig.num_CUs):
            k2.link_input_data = k2.link_input_data + ['$SHARED/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)]

        return [k1,k2]
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="md.coco")
        k1.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints={0}".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--mdfile=*.ncdf",
                       "--output=pentaopt%s"%(iteration)]
        k1.cores = RPconfig.PILOTSIZE

        k1.copy_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),'$PRE_LOOP/pycoco.py > pycoco.py']
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):
                k1.copy_input_data = k1.copy_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)]

        temp=list()
        for i in range(0,Kconfig.num_CUs):
            temp = temp + ['pentaopt{0}{1}.pdb > $PRE_LOOP/iter{0}/pentaopt{0}{1}.pdb'.format(iteration,i)]
	k1.copy_output_data = temp


        k2 = Kernel(name="md.tleap")
        k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs),
                        "--cycle={0}".format(iteration)]

        temp=list()
        for i in range(0,Kconfig.num_CUs):
            temp = temp + ['$PRE_LOOP/iter{0}/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)]
	k2.link_input_data=temp + ['$PRE_LOOP/postexec.py']

	temp=list()
	for i in range(0,Kconfig.num_CUs):
	    temp = temp + ['min{0}{1}.crd > $PRE_LOOP/iter{2}/min{0}{1}.crd'.format(iteration,i,iteration+1)]
	k2.copy_output_data = temp

        return [k1,k2]
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename and/or reference coordinates file filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="custom.amber")
        k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                       "--cycle=%s"%(iteration)]
        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.minimization_input_file)),
                             '$SHARED/{0}'.format(os.path.basename(Kconfig.top_file)),
                             '$SHARED/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
        k1.cores = Kconfig.num_cores_per_sim_cu
        if((iteration-1)==0):
            k1.link_input_data = k1.link_input_data + ['$SHARED/{0} > min1.rst7'.format(os.path.basename(Kconfig.initial_crd_file))]
            k1.copy_output_data = ['min1.rst7 > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)]
        else:
            k1.link_input_data = k1.link_input_data + ['$SHARED/min_{0}_{1}.rst7 > min{2}.rst7'.format(iteration-1,instance-1,iteration)]
            k1.copy_output_data = ['md{0}.rst > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)]
        

        k2 = Kernel(name="custom.amber")
        k2.arguments = [
                            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                            "--cycle=%s"%(iteration)
                
                        ]
        k2.link_input_data = [  
                                "$SHARED/{0}".format(os.path.basename(Kconfig.md_input_file)),
                                "$SHARED/{0}".format(os.path.basename(Kconfig.top_file)),
                                "$SHARED/md_{0}_{1}.rst > md{0}.rst".format(iteration,instance),
                            ]
	k2.cores = Kconfig.num_cores_per_sim_cu
        if(iteration%Kconfig.nsave==0):
            k2.download_output_data = ['md{0}.nc > output/iter{0}/md_{0}_{1}.nc'.format(iteration,instance)]

        return [k1,k2]
    def simulation_step(self, iteration, instance):
        '''
        function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        previous iteration. Run gromacs in each instance using these files.

        gromacs :-

                Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input
                            from pre_loop. There are 'numCUs' number of instances of gromacs per iteration.

                Arguments : --grompp    = gromacs parameters filename
                            --topol     = topology filename
        '''

        #----------------------------------------------------------------------------------------------------------
        # GROMPP kernel

        k1 = Kernel(name="custom.grompp")
        k1.arguments = [
            "--mdp={0}".format(os.path.basename(Kconfig.mdp_file)),
            "--gro=start.gro",
            "--top={0}".format(os.path.basename(Kconfig.top_file)),
            "--tpr=topol.tpr"
        ]

        k1.link_input_data = [
            '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)),
            '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file))
        ]

        k1.copy_output_data = [
            'topol.tpr > $SHARED/iter_{1}/topol_{0}.tpr'.format(
                instance - 1, iteration - 1)
        ]

        if (iteration - 1 == 0):
            k1.link_input_data.append(
                '$PRE_LOOP/temp/start{0}.gro > start.gro'.format(instance - 1))

        else:
            k1.link_input_data.append(
                '$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro'
                .format(iteration - 1, instance - 1))
        #----------------------------------------------------------------------------------------------------------

        #----------------------------------------------------------------------------------------------------------
        # MDRUN kernel

        k2 = Kernel(name="custom.mdrun")
        k2.arguments = ["--size=1", "--tpr=topol.tpr", "--out=out.gro"]

        k2.link_input_data = [
            '$SHARED/iter_{1}/topol_{0}.tpr > topol.tpr'.format(
                instance - 1, iteration - 1)
        ]
        #----------------------------------------------------------------------------------------------------------

        return [k1, k2]
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="custom.coco")
        k1.arguments = ["--grid={0}".format(Kconfig.grid),
                       "--dims={0}".format(Kconfig.dims),
                       "--frontpoints={0}".format(Kconfig.num_CUs),
                       "--topfile={0}".format(os.path.basename(Kconfig.ref_file)),
                       "--mdfile=*.nc",
                       "--output=coco.rst7",
                       "--atom_selection={0}".format(Kconfig.atom_selection)]
        k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1.uses_mpi = True

        k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.ref_file))]
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):
                k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.nc > md_{0}_{1}.nc'.format(iter,i)]

        k1.copy_output_data = list()
        for i in range(0,Kconfig.num_CUs):
            k1.copy_output_data = k1.copy_output_data + ['coco{1}.rst7 > $SHARED/min_{0}_{1}.rst7'.format(iteration,i)]

        if(iteration%Kconfig.nsave==0):
            k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)]


        return k1
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. 
        

        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iteration number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''

        k1_ana_kernel = Kernel(name="md.coco")

        k1_ana_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                                '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'.format(iteration,iteration-1)]
        for iter in range(1,iteration+1):
            for i in range(1,Kconfig.num_CUs+1):        
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format(iter,i,iter-1,i-1)]

        
        k1_ana_kernel.cores = 1
        k1_ana_kernel.uses_mpi = False
        
        outbase, ext = os.path.basename(Kconfig.output).split('.')

        #Not sure why this if condition is required
        #if ext == '':
	#		ext = '.pdb'
                
        k1_ana_kernel.arguments = ["--grid={0}".format(Kconfig.grid),
                                   "--dims={0}".format(Kconfig.dims),
                                   "--frontpoints={0}".format(Kconfig.num_CUs),
                                   "--topfile=md-{0}_0.gro".format(iteration-1),
                                   "--mdfile=*.xtc",
                                   "--output={0}_{1}.{2}".format(outbase,iteration-1,ext),
                                   "--atom_selection={0}".format(Kconfig.sel)]

        k1_ana_kernel.copy_output_data = []
        for i in range(0,Kconfig.num_CUs):
            k1_ana_kernel.copy_output_data += ["{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format(outbase,iteration-1,i,ext)]

        k1_ana_kernel.download_output_data = ["coco.log > output/coco-iter{0}.log".format(iteration-1)]	
        

        return [k1_ana_kernel]
    def simulation_step(self, iteration, instance):

        '''
        function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        previous iteration. Run gromacs in each instance using these files.

        gromacs :-

                Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input
                            from pre_loop. There are 'numCUs' number of instances of gromacs per iteration.

                Arguments : --grompp    = gromacs parameters filename
                            --topol     = topology filename
        '''

        #----------------------------------------------------------------------------------------------------------
        # GROMPP kernel

        k1 = Kernel(name="custom.grompp")
        k1.arguments = [
                          "--mdp={0}".format(os.path.basename(Kconfig.mdp_file)),
                          "--gro=start.gro",
                          "--top={0}".format(os.path.basename(Kconfig.top_file)),
                          "--tpr=topol.tpr"
                        ]

        k1.link_input_data = ['$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)),
                              '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file))]

        k1.copy_output_data = ['topol.tpr > $SHARED/iter_{1}/topol_{0}.tpr'.format(instance-1,iteration-1)]

        if (iteration-1==0):
            k1.link_input_data.append('$PRE_LOOP/temp/start{0}.gro > start.gro'.format(instance-1))

        else:
            k1.link_input_data.append('$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro'.format(iteration-1,instance-1))
        #----------------------------------------------------------------------------------------------------------


        #----------------------------------------------------------------------------------------------------------
        # MDRUN kernel

        k2 = Kernel(name="custom.mdrun")
        k2.arguments = [
                          "--size=1",
                          "--tpr=topol.tpr",
                          "--out=out.gro"
                        ]

        k2.link_input_data = ['$SHARED/iter_{1}/topol_{0}.tpr > topol.tpr'.format(instance-1,iteration-1)]
        #----------------------------------------------------------------------------------------------------------

        return [k1,k2]
예제 #8
0
    def simulation_stage(self,iteration,instance):

        k= Kernel(name="atom_dist")
        #do this with instance param
        k.arguments=['--python-script=atom_distances.py','--traj-count={0}'.format(str(traj_count)),
                     '--window-size={0}'.format(str(window_size)),'--cutoff=15',
                     '--row={0}'.format(str(window_list[instance-1][0]+1)),
                     '--column={0}'.format(str(window_list[instance-1][1]+1))]
        k.link_input_data=['$SHARED/input.txt > input.txt']
        k.upload_input_data=['atom_distances.py']
        outputFile = 'distances_{0}_{1}.npz.npy'.format(str(window_list[instance-1][0]),str(window_list[instance-1][1]))
        k.copy_output_data=['{0} > $SHARED/{0}'.format(outputFile)]
        return k   
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="md.amber")
        k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)),
                       #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                       "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                       "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                       "--cycle=%s"%(iteration),
                       "--instance=%s"%(instance)]
        k1.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.minimization_input_file)),
                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
        k1.cores=1
        if((iteration-1)==0):
            k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/{0} > min1.crd'.format(os.path.basename(Kconfig.initial_crd_file))]
        else:
            k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/iter{2}/min{0}{1}.crd > min{2}.crd'.format(iteration-1,instance-1,iteration)]
        k1.copy_output_data = ['md{0}.crd > $PRE_LOOP/iter{0}/md_{0}_{1}.crd'.format(iteration,instance)]
        

        k2 = Kernel(name="md.amber")
        k2.arguments = [
                            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
                            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
                            "--cycle=%s"%(iteration),
                            "--instance=%s"%(instance)
                        ]
        k2.link_input_data = [  
                                "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)),
                                "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)),
                                "$PRE_LOOP/iter{0}/md_{0}_{1}.crd > md{0}.crd".format(iteration,instance),
                            ]
#        if(iteration%Kconfig.nsave==0):
#            k1.download_output_data = ['md{0}.ncdf > backup/iter{0}/md_{0}_{1}.ncdf'.format(iteration,instance)]

        k2.cores = 1
        return [k1,k2]
    def prepare_replica_for_md(self, replica):
        """Specifies input and output files and passes them to NAMD kernel

        Arguments:
        replica - object representing a given replica and it's attributes

        Returns:
        k - an instance of Kernel class
        """

        self.build_input_file(replica)
        input_file = "%s_%d_%d.namd" % (self.inp_basename[:-5], \
                                        replica.id, \
                                        (replica.cycle))
        # this can be commented out
        output_file = replica.new_history

        new_coor = replica.new_coor
        new_vel = replica.new_vel
        new_history = replica.new_history
        new_ext_system = replica.new_ext_system

        old_coor = replica.old_coor
        old_vel = replica.old_vel
        old_ext_system = replica.old_ext_system 

        copy_out = []
        copy_out.append(new_history)
        copy_out.append(new_coor)
        copy_out.append(new_vel)
        copy_out.append(new_ext_system)

        k = Kernel(name="md.namd")
        k.arguments            = [input_file]
        k.upload_input_data    = [str(input_file)] 
        k.copy_output_data = copy_out
        k.download_output_data = new_history

        replica.cycle += 1
        return k
예제 #11
0
    def prepare_replica_for_md(self, replica):
        """Specifies input and output files and passes them to NAMD kernel

        Arguments:
        replica - object representing a given replica and it's attributes

        Returns:
        k - an instance of Kernel class
        """

        self.build_input_file(replica)
        input_file = "%s_%d_%d.namd" % (self.inp_basename[:-5], \
                                        replica.id, \
                                        (replica.cycle))
        # this can be commented out
        output_file = replica.new_history

        new_coor = replica.new_coor
        new_vel = replica.new_vel
        new_history = replica.new_history
        new_ext_system = replica.new_ext_system

        old_coor = replica.old_coor
        old_vel = replica.old_vel
        old_ext_system = replica.old_ext_system

        copy_out = []
        copy_out.append(new_history)
        copy_out.append(new_coor)
        copy_out.append(new_vel)
        copy_out.append(new_ext_system)

        k = Kernel(name="md.namd")
        k.arguments = [input_file]
        k.upload_input_data = [str(input_file)]
        k.copy_output_data = copy_out
        k.download_output_data = new_history

        replica.cycle += 1
        return k
예제 #12
0
 def step_1(self, instance):
     k = Kernel(name="misc.chksum")
     k.arguments = ["--inputfile=input.txt", "--outputfile=checksum.txt"]
     k.copy_input_data = ["/etc/passwd > input.txt"]
     k.copy_output_data = self._output_copy_directives
     return k
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iterMod. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations.


        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iterMod number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''
        #shareDir="$SHARED"
        #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area"
        shareDir = "/work/fbettenc/p14b01_pool/staging_area"

        prev_sim_last_iter_to_use = 48
        iterMod = iteration + prev_sim_last_iter_to_use
        k1_ana_kernel = Kernel(name="custom.coco")

        outbase, ext = os.path.basename(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        k1_ana_kernel.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iterMod - 1), "--mdfile=*.xtc",
            "--output={0}_{1}_.gro".format(outbase, iterMod - 1),
            "--atom_selection={0}".format(Kconfig.sel)
        ]
        # k1_ana_kernel.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE)
        k1_ana_kernel.cores = min(
            Kconfig.num_CUs * (iterMod + 1), RPconfig.PILOTSIZE
        )  # set to iterMod+1 bec at first iter coco analysis of k8 output so coco is iter ahead sort of

        print " "
        print "iter,iterMod,AnaCUcores = ", iteration, ", ", iterMod, ", ", k1_ana_kernel.cores
        print " "

        k1_ana_kernel.uses_mpi = True
        k1_ana_kernel.link_input_data = [
            shareDir +
            '/md-{1}_0.gro > md-{1}_0.gro'.format(iterMod, iterMod - 1)
        ]
        for iter in range(1, iterMod + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [
                    shareDir + '/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format(
                        iter, i, iter - 1, i - 1)
                ]

        k1_ana_kernel.copy_output_data = []
        for i in range(0, Kconfig.num_CUs):
            #k1_ana_kernel.copy_output_data += ["{0}_{1}_{2}.gro > $SHARED/{0}_{1}_{2}.gro".format(outbase,iterMod-1,i,ext)]
            k1_ana_kernel.copy_output_data += [
                "{0}_{1}_{2}.gro > ".format(outbase, iterMod - 1, i, ext) +
                shareDir +
                "/{0}_{1}_{2}.gro".format(outbase, iterMod - 1, i, ext)
            ]

        k1_ana_kernel.download_output_data = [
            "coco.log > output/coco-iter{0}.log".format(iterMod - 1)
        ]

        return [k1_ana_kernel]
    def analysis_step(self, iteration, instance):
        '''
        function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the
        new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage
        in the next iteration.

        If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires).
        Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to
        be mentioned by the user.

        pre_lsdmap :-

                Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate
                            such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per
                            iteration.

                Arguments : --numCUs = number of simulation instances / number of small files to be concatenated

        lsdmap :-

                Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance
                            of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model).

                Arguments : --config = name of the config file to be used during LSDMap

        post_lsdmap :-


                Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
                            to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one
                            instance of post_lsdmap per iteration.

                Arguments : --num_runs              = number of configurations to be generated in the new coordinate file
                            --out                   = output filename
                            --cycle                 = iteration number
                            --max_dead_neighbors    = max dead neighbors to be considered
                            --max_alive_neighbors   = max alive neighbors to be considered
                            --numCUs                = number of simulation instances/ number of smaller files
        '''

        pre_ana = Kernel(name="md.pre_lsdmap")
        pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)]
        pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"]
        for i in range(1, Kconfig.num_CUs + 1):
            pre_ana.link_input_data = pre_ana.link_input_data + [
                "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro".
                format(i, i - 1, iteration)
            ]
        pre_ana.copy_output_data = [
            'tmpha.gro > $PRE_LOOP/tmpha.gro', 'tmp.gro > $PRE_LOOP/tmp.gro'
        ]

        lsdmap = Kernel(name="md.lsdmap")
        lsdmap.arguments = [
            "--config={0}".format(os.path.basename(Kconfig.lsdm_config_file))
        ]
        lsdmap.link_input_data = [
            '$PRE_LOOP/{0} > {0}'.format(
                os.path.basename(Kconfig.lsdm_config_file)),
            '$PRE_LOOP/tmpha.gro > tmpha.gro'
        ]
        lsdmap.cores = 1
        if iteration > 1:
            lsdmap.link_input_data += [
                '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w'.
                format(iteration - 1)
            ]
            lsdmap.copy_output_data = ['weight.w > $PRE_LOOP/weight.w']
        lsdmap.copy_output_data = [
            'tmpha.ev > $PRE_LOOP/tmpha.ev', 'out.nn > $PRE_LOOP/out.nn'
        ]

        if (iteration % Kconfig.nsave == 0):
            lsdmap.download_output_data = [
                'lsdmap.log > backup/iter{0}/lsdmap.log'.format(iteration)
            ]

        post_ana = Kernel(name="md.post_lsdmap")
        post_ana.link_input_data = [
            "$PRE_LOOP/post_analyze.py > post_analyze.py",
            "$PRE_LOOP/selection.py > selection.py",
            "$PRE_LOOP/reweighting.py > reweighting.py",
            "$PRE_LOOP/spliter.py > spliter.py", "$PRE_LOOP/gro.py > gro.py",
            "$PRE_LOOP/tmp.gro > tmp.gro", "$PRE_LOOP/tmpha.ev > tmpha.ev",
            "$PRE_LOOP/out.nn > out.nn", "$PRE_LOOP/input.gro > input.gro"
        ]

        post_ana.arguments = [
            "--num_runs={0}".format(Kconfig.num_runs), "--out=out.gro",
            "--cycle={0}".format(iteration - 1),
            "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors),
            "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors),
            "--numCUs={0}".format(Kconfig.num_CUs)
        ]

        if iteration > 1:
            post_ana.link_input_data += [
                '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w'.
                format(iteration - 1)
            ]

        if (iteration % Kconfig.nsave == 0):
            post_ana.download_output_data = [
                'out.gro > backup/iter{0}/out.gro'.format(iteration),
                'weight.w > backup/iter{0}/weight.w'.format(iteration)
            ]

        return [pre_ana, lsdmap, post_ana]
    def simulation_stage(self, iteration, instance):

        kernel_list = []
        iter1 = iteration - 1
        inst1 = instance - 1
        outbase, ext = opb(Kconfig.output).split('.')
        shrd = '$SHARED/{0}'
        if ext == '':
            ext = '.pdb'

        if ((iter1) != 0):
            # Kernel 1: Grompp before energy min step.
            k1 = Kernel(name="custom.grompp")
            k1.link_input_data = [
                shrd.format(opb(Kconfig.grompp_1_mdp)),
                shrd.format(opb(Kconfig.top_file)),
                shrd.format(opb(Kconfig.restr_file)),
                shrd.format(opb(Kconfig.grompp_1_itp_file))
            ]
            prev = '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'
            k1.link_input_data += [
                prev.format(outbase, iteration - 2, inst1, ext)
            ]
            k1.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_1_mdp)),
                "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2, inst1,
                                              ext),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--gro={0}".format(opb(Kconfig.restr_file)),
                "--tpr=min-{0}_{1}.tpr".format(iter1, inst1)
            ]
            cout = 'min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr'
            k1.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k1)

            # Kernel 2: Restrained energy min step.
            k2 = Kernel(name="custom.mdrun")
            lind = '$SHARED/min-{0}_{1}.tpr > min-{0}_{1}.tpr'
            k2.link_input_data = [lind.format(iter1, inst1)]
            k2.cores = Kconfig.num_cores_per_sim_cu
            k2.arguments = ["--deffnm=min-{0}_{1}".format(iter1, inst1)]
            cout = 'min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro'
            k2.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k2)

            # Kernel 3: Grompp before restrained MD step
            k3 = Kernel(name="custom.grompp")
            k3.link_input_data = [
                shrd.format(opb(Kconfig.grompp_2_mdp)),
                shrd.format(opb(Kconfig.top_file)),
                shrd.format(opb(Kconfig.restr_file)),
                shrd.format(opb(Kconfig.grompp_2_itp_file))
            ]
            lind = '$SHARED/min-{0}_{1}.gro > min-{0}_{1}.gro'
            k3.link_input_data += [lind.format(iter1, inst1)]
            k3.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_2_mdp)),
                "--ref=min-{0}_{1}.gro".format(iter1, inst1),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--gro={0}".format(opb(Kconfig.restr_file)),
                "--tpr=eq-{0}_{1}.tpr".format(iter1, inst1)
            ]
            cout = 'eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr'
            k3.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k3)

            # Kernel 4: Restrained MD step.
            k4 = Kernel(name="custom.mdrun")
            lind = '$SHARED/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'
            k4.link_input_data = [lind.format(iter1, inst1)]
            k4.cores = Kconfig.num_cores_per_sim_cu
            k4.arguments = ["--deffnm=eq-{0}_{1}".format(iter1, inst1)]
            cout = 'eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro'
            k4.copy_output_data = [cout.format(iter1, inst1)]
            kernel_list.append(k4)

        # Kernel 5: Grompp before unrestrained (production) MD.
        k5 = Kernel(name="custom.grompp")
        k5.link_input_data = [
            shrd.format(opb(Kconfig.grompp_3_mdp)),
            shrd.format(opb(Kconfig.top_file))
        ]
        if ((iter1) == 0):
            k5.link_input_data += [shrd.format(opb(Kconfig.initial_crd_file))]
            k5.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)),
                "--gro={0}".format(opb(Kconfig.initial_crd_file)),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iter1, inst1)
            ]
        else:
            lind = '$SHARED/eq-{0}_{1}.gro > eq-{0}_{1}.gro'
            k5.link_input_data += [lind.format(iter1, inst1)]
            k5.arguments = [
                "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)),
                "--gro=eq-{0}_{1}.gro".format(iter1, inst1),
                "--top={0}".format(opb(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iter1, inst1)
            ]
        cout = 'md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr'
        k5.copy_output_data = [cout.format(iter1, inst1)]
        kernel_list.append(k5)

        #Kernel 6: Production MD step.
        k6 = Kernel(name="custom.mdrun")
        lind = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k6.link_input_data = [lind.format(iter1, inst1)]
        k6.cores = Kconfig.num_cores_per_sim_cu
        k6.arguments = ["--deffnm=md-{0}_{1}".format(iter1, inst1)]
        cout = 'md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro'
        xout = 'md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc'
        k6.copy_output_data = [
            cout.format(iter1, inst1),
            xout.format(iter1, inst1)
        ]
        kernel_list.append(k6)

        #Kernel 7: Post-processing of output structure file to correct
        #          PBC effects.
        k7 = Kernel(name="custom.trjconv")
        lind = '$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro'
        tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k7.link_input_data = [
            lind.format(iter1, inst1),
            tpin.format(iter1, inst1)
        ]
        k7.arguments = [
            "--echo=System", "--f=md-{0}_{1}.gro".format(iter1, inst1),
            "--s=md-{0}_{1}.tpr".format(iter1, inst1),
            "--o=md-{0}_{1}_whole.gro".format(iter1, inst1), "--pbc=whole"
        ]
        cout = 'md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro'
        k7.copy_output_data = [cout.format(iter1, inst1)]
        kernel_list.append(k7)

        #Kernel 8: Post-processing of output trajectory file to correct
        #          PBC effects.
        k8 = Kernel(name="custom.trjconv")
        lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc'
        tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr'
        k8.link_input_data = [
            lind.format(iter1, inst1),
            tpin.format(iter1, inst1)
        ]
        k8.arguments = [
            "--echo=System", "--f=md-{0}_{1}.xtc".format(iter1, inst1),
            "--s=md-{0}_{1}.tpr".format(iter1, inst1),
            "--o=md-{0}_{1}_whole.xtc".format(iter1, inst1), "--pbc=whole"
        ]
        if (iteration % Kconfig.nsave == 0):
            dout = "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc"
            k8.download_output_data = [dout.format(iter1, inst1)]
        xout = 'md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc'
        k8.copy_output_data = [xout.format(iter1, inst1)]
        kernel_list.append(k8)

        return kernel_list
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated
        in the previous iteration. 
        - Preprocess the simulation parameters, coordinates structure and topology file to generate the 
        portable binary run - .tpr - file to be used by the simulation run;
        - Run the simulations;
        - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system
        in the periodic boundary conditions simulation box.

        md.grompp: -
        
                Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information
                from the simulation parameters, topology file and the initial coordinates file.
                
                Arguments : --mdp  = simulation parameters file - input
                            --gro  = single coordinates file - input
                            --top  = topology filename - input
                            --ref  = single coordinates file to be used as a reference for position restraints - input
                            --tpr  = portable binary run file - output
        md.mdrun :-

                Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous 
                grompp kernel, using as input the .tpr file generated by the previous grompp kernel.
                Among others generates a .xtc file in each instance, all of which will be used for further analysis.

                Arguments : -deffnm = basename that will be used for all generated files in output but also to determine
                the .tpr file in input.
        '''
        
        kernel_list = []
        
        if((iteration-1)!=0):

            outbase, ext = os.path.basename(Kconfig.output).split('.')
            if ext == '':
		    	ext = '.pdb'
            
            k1_prep_min_kernel = Kernel(name="md.grompp")
            k1_prep_min_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                                                  '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))]			
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'.format(outbase,iteration-2,instance-1,ext)]
            k1_prep_min_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)),
                                            "--ref={0}_{1}{2}.{3}".format(outbase,iteration-2,instance-1,ext),
                                            "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                            "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                                            "--tpr=min-{0}_{1}.tpr".format(iteration-1,instance-1)]
            k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format(iteration-1,instance-1)]    
            kernel_list.append(k1_prep_min_kernel)
            
            k2_min_kernel = Kernel(name="md.mdrun")
            k2_min_kernel.link_input_data = ['$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = ["--deffnm=min-{0}_{1}".format(iteration-1,instance-1)]
            k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format(iteration-1,instance-1)]
            kernel_list.append(k2_min_kernel)
            
            k3_prep_eq_kernel = Kernel(name="md.grompp")
            k3_prep_eq_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                                                 '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(iteration-1,instance-1)]
            k3_prep_eq_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)),
                                           "--ref=min-{0}_{1}.gro".format(iteration-1,instance-1),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                                           "--tpr=eq-{0}_{1}.tpr".format(iteration-1,instance-1)]
            k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="md.mdrun")
            k4_eq_kernel.link_input_data = ['$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(iteration-1,instance-1)]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = ["--deffnm=eq-{0}_{1}".format(iteration-1,instance-1)]
            k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format(iteration-1,instance-1)]
            kernel_list.append(k4_eq_kernel)
			
        k5_prep_sim_kernel = Kernel(name="md.grompp")
        k5_prep_sim_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)),
                                             '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))]
        if((iteration-1)==0):
            k5_prep_sim_kernel.link_input_data =  k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))]
            k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                                           "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)]  
        else:
            k5_prep_sim_kernel.link_input_data =  k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(iteration-1,instance-1)]
            k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                                           "--gro=eq-{0}_{1}.gro".format(iteration-1,instance-1),
                                           "--top={0}".format(os.path.basename(Kconfig.top_file)),
                                           "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)]             
        k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format(iteration-1,instance-1)]        
        kernel_list.append(k5_prep_sim_kernel)
        
        k6_sim_kernel = Kernel(name="md.mdrun")
        k6_sim_kernel.link_input_data = ['$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(iteration-1,instance-1)]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = ["--deffnm=md-{0}_{1}".format(iteration-1,instance-1)]
        k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format(iteration-1,instance-1),
                                          "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format(iteration-1,instance-1)]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="md.trjconv")
        k7_sim_kernel.link_input_data = ["$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iteration-1,instance-1),
                                         "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iteration-1,instance-1),
                                         "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iteration-1,instance-1)]
        k7_sim_kernel.arguments = ["--echo1=System",
                                   "--f1=md-{0}_{1}.gro".format(iteration-1,instance-1),
                                   "--s1=md-{0}_{1}.tpr".format(iteration-1,instance-1),
                                   "--o1=md-{0}_{1}_whole.gro".format(iteration-1,instance-1),
                                   "--pbc1=whole",
                                   "--echo2=System",
                                   "--f2=md-{0}_{1}.xtc".format(iteration-1,instance-1),
                                   "--s2=md-{0}_{1}.tpr".format(iteration-1,instance-1),
                                   "--o2=md-{0}_{1}_whole.xtc".format(iteration-1,instance-1),
                                   "--pbc2=whole"]
        if(iteration%Kconfig.nsave==0):
            k7_sim_kernel.download_output_data = ["md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".format(iteration-1,instance-1)]	        
        kernel_list.append(k7_sim_kernel)              
        
        return kernel_list
 def step_1(self, instance):
     k = Kernel(name="misc.chksum")
     k.arguments            = ["--inputfile=input.txt", "--outputfile=checksum.txt"]
     k.copy_input_data      = ["/etc/passwd > input.txt"]
     k.copy_output_data     = self._output_copy_directives
     return k
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated
        in the previous iteration. 
        - Preprocess the simulation parameters, coordinates structure and topology file to generate the 
        portable binary run - .tpr - file to be used by the simulation run;
        - Run the simulations;
        - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system
        in the periodic boundary conditions simulation box.

        md.grompp: -
        
                Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information
                from the simulation parameters, topology file and the initial coordinates file.
                
                Arguments : --mdp  = simulation parameters file - input
                            --gro  = single coordinates file - input
                            --top  = topology filename - input
                            --ref  = single coordinates file to be used as a reference for position restraints - input
                            --tpr  = portable binary run file - output
        md.mdrun :-

                Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous 
                grompp kernel, using as input the .tpr file generated by the previous grompp kernel.
                Among others generates a .xtc file in each instance, all of which will be used for further analysis.

                Arguments : -deffnm = basename that will be used for all generated files in output but also to determine
                the .tpr file in input.
        '''

        kernel_list = []

        if ((iteration - 1) != 0):

            outbase, ext = os.path.basename(Kconfig.output).split('.')
            if ext == '':
                ext = '.pdb'

            k1_prep_min_kernel = Kernel(name="md.grompp")
            k1_prep_min_kernel.link_input_data = [
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))
            ]
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [
                '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'.
                format(outbase, iteration - 2, instance - 1, ext)
            ]
            k1_prep_min_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)),
                "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2,
                                              instance - 1, ext),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                "--tpr=min-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
            k1_prep_min_kernel.copy_output_data = [
                'min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k1_prep_min_kernel)

            k2_min_kernel = Kernel(name="md.mdrun")
            k2_min_kernel.link_input_data = [
                '$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = [
                "--deffnm=min-{0}_{1}".format(iteration - 1, instance - 1)
            ]
            k2_min_kernel.copy_output_data = [
                'min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k2_min_kernel)

            k3_prep_eq_kernel = Kernel(name="md.grompp")
            k3_prep_eq_kernel.link_input_data = [
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)),
                '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))
            ]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                '$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            k3_prep_eq_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)),
                "--ref=min-{0}_{1}.gro".format(iteration - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                "--tpr=eq-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
            k3_prep_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="md.mdrun")
            k4_eq_kernel.link_input_data = [
                '$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(
                    iteration - 1, instance - 1)
            ]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = [
                "--deffnm=eq-{0}_{1}".format(iteration - 1, instance - 1)
            ]
            k4_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            kernel_list.append(k4_eq_kernel)

        k5_prep_sim_kernel = Kernel(name="md.grompp")
        k5_prep_sim_kernel.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        if ((iteration - 1) == 0):
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                '$PRE_LOOP/{0}'.format(
                    os.path.basename(Kconfig.initial_crd_file))
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
        else:
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                '$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(
                    iteration - 1, instance - 1)
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)),
                "--gro=eq-{0}_{1}.gro".format(iteration - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1)
            ]
        k5_prep_sim_kernel.copy_output_data = [
            'md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format(
                iteration - 1, instance - 1)
        ]
        kernel_list.append(k5_prep_sim_kernel)

        k6_sim_kernel = Kernel(name="md.mdrun")
        k6_sim_kernel.link_input_data = [
            '$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(
                iteration - 1, instance - 1)
        ]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = [
            "--deffnm=md-{0}_{1}".format(iteration - 1, instance - 1)
        ]
        k6_sim_kernel.copy_output_data = [
            "md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format(
                iteration - 1, instance - 1),
            "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format(
                iteration - 1, instance - 1)
        ]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="md.trjconv")
        k7_sim_kernel.link_input_data = [
            "$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format(
                iteration - 1, instance - 1),
            "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(
                iteration - 1, instance - 1),
            "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iteration - 1, instance - 1)
        ]
        k7_sim_kernel.arguments = [
            "--echo1=System",
            "--f1=md-{0}_{1}.gro".format(iteration - 1, instance - 1),
            "--s1=md-{0}_{1}.tpr".format(iteration - 1, instance - 1),
            "--o1=md-{0}_{1}_whole.gro".format(iteration - 1, instance - 1),
            "--pbc1=whole", "--echo2=System",
            "--f2=md-{0}_{1}.xtc".format(iteration - 1, instance - 1),
            "--s2=md-{0}_{1}.tpr".format(iteration - 1, instance - 1),
            "--o2=md-{0}_{1}_whole.xtc".format(iteration - 1,
                                               instance - 1), "--pbc2=whole"
        ]
        if (iteration % Kconfig.nsave == 0):
            k7_sim_kernel.download_output_data = [
                "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".
                format(iteration - 1, instance - 1)
            ]
        kernel_list.append(k7_sim_kernel)

        return kernel_list
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc
         files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. 
        

        coco :-

                Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be

                Arguments : --grid           = Number of points along each dimension of the CoCo histogram
                            --dims           = The number of projections to consider from the input pcz file
                            --frontpoints    = Number of CUs
                            --topfile        = Topology filename
                            --mdfile         = MD Input filename
                            --output         = Output filename
                            --cycle          = Current iteration number
                            --atom_selection = Selection of the biological part of the system we want to consider for analysis
        '''

        k1_ana_kernel = Kernel(name="md.coco")

        k1_ana_kernel.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
            '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'.
            format(iteration, iteration - 1)
        ]
        for iter in range(1, iteration + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [
                    '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc'
                    .format(iter, i, iter - 1, i - 1)
                ]

        k1_ana_kernel.cores = 1
        k1_ana_kernel.uses_mpi = False

        outbase, ext = os.path.basename(Kconfig.output).split('.')

        #Not sure why this if condition is required
        #if ext == '':
        #		ext = '.pdb'

        k1_ana_kernel.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iteration - 1), "--mdfile=*.xtc",
            "--output={0}_{1}.{2}".format(outbase, iteration - 1, ext),
            "--atom_selection={0}".format(Kconfig.sel)
        ]

        k1_ana_kernel.copy_output_data = []
        for i in range(0, Kconfig.num_CUs):
            k1_ana_kernel.copy_output_data += [
                "{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format(
                    outbase, iteration - 1, i, ext)
            ]

        k1_ana_kernel.download_output_data = [
            "coco.log > output/coco-iter{0}.log".format(iteration - 1)
        ]

        return [k1_ana_kernel]
    def simulation_stage(self, iteration, instance):

        #shareDir="$SHARED"
        #shareDir="staging://" # $SHARED is place holder and is replaced at runtime by "staging://"
        #https://github.com/radical-cybertools/radical.entk/blob/master/src/radical/entk/execution_plugin/staging/placeholders.py#L25
        #shareDir="/work/fbettenc/radical.pilot.sandbox/p13b01_left_d3_k12_1000_k34_1000"
        # note tried without / before work and failed. diff err for /work/.. than work/..
        #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area"
        shareDir = "/work/fbettenc/p14b01_pool/staging_area"
        prev_sim_last_iter_to_use = 48
        iterMod = iteration + prev_sim_last_iter_to_use

        kernel_list = []

        outbase, ext = os.path.basename(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        if ((iterMod - 1) != 0):
            k1_prep_min_kernel = Kernel(name="custom.grompp")
            k1_prep_min_kernel.link_input_data = [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_1_mdp)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)),
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_1_itp_file))
            ]
            #k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)]
            k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [
                shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(
                    outbase, iterMod - 2, instance - 1, ext)
            ]

            k1_prep_min_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_1_mdp)),
                "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2,
                                               instance - 1, ext),
                #"--ref={0}".format(os.path.basename(Kconfig.restr_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--gro={0}".format(os.path.basename(Kconfig.restr_file)),
                #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext),
                "--tpr=min-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
            #k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
            k1_prep_min_kernel.copy_output_data = [
                'min-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) +
                shareDir + '/min-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
            ]
            kernel_list.append(k1_prep_min_kernel)

            k2_min_kernel = Kernel(name="custom.mdrun")
            k2_min_kernel.link_input_data = [
                shareDir + '/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(
                    iterMod - 1, instance - 1)
            ]
            k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu
            k2_min_kernel.arguments = [
                "--deffnm=min-{0}_{1}".format(iterMod - 1, instance - 1)
            ]
            #k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro'.format(iterMod-1,instance-1)]
            k2_min_kernel.copy_output_data = [
                'min-{0}_{1}.gro >'.format(iterMod - 1, instance - 1) +
                shareDir + '/min-{0}_{1}.gro'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k2_min_kernel)

            k3_prep_eq_kernel = Kernel(name="custom.grompp")
            k3_prep_eq_kernel.link_input_data = [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_2_mdp)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)),
                shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)),
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.grompp_2_itp_file))
            ]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                shareDir + '/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(
                    iterMod - 1, instance - 1)
            ]
            #k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)]
            k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [
                shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(
                    outbase, iterMod - 2, instance - 1, ext)
            ]
            k3_prep_eq_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_2_mdp)),
                "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2,
                                               instance - 1, ext),
                #"--ref=min-{0}_{1}.gro".format(iterMod-1,instance-1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                ##"--gro=min-{0}_{1}".format(iterMod-1,instance-1),
                "--gro=min-{0}_{1}.gro".format(iterMod - 1, instance - 1),
                #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext),
                "--tpr=eq-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
            #k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
            k3_prep_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) +
                shareDir + '/eq-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k3_prep_eq_kernel)

            k4_eq_kernel = Kernel(name="custom.mdrun")
            k4_eq_kernel.link_input_data = [
                shareDir + '/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(
                    iterMod - 1, instance - 1)
            ]
            k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu
            k4_eq_kernel.arguments = [
                "--deffnm=eq-{0}_{1}".format(iterMod - 1, instance - 1)
            ]
            #k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro'.format(iterMod-1,instance-1)]
            k4_eq_kernel.copy_output_data = [
                'eq-{0}_{1}.gro > '.format(iterMod - 1, instance - 1) +
                shareDir + '/eq-{0}_{1}.gro'.format(iterMod - 1, instance - 1)
            ]

            kernel_list.append(k4_eq_kernel)

        k5_prep_sim_kernel = Kernel(name="custom.grompp")
        k5_prep_sim_kernel.link_input_data = [
            shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_3_mdp)),
            shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        if ((iterMod - 1) == 0):
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                shareDir +
                '/{0}'.format(os.path.basename(Kconfig.initial_crd_file))
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)),
                "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
        else:
            k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [
                shareDir + '/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(
                    iterMod - 1, instance - 1)
            ]
            k5_prep_sim_kernel.arguments = [
                "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)),
                "--gro=eq-{0}_{1}.gro".format(iterMod - 1, instance - 1),
                "--top={0}".format(os.path.basename(Kconfig.top_file)),
                "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1)
            ]
        #k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr'.format(iterMod-1,instance-1)]
        k5_prep_sim_kernel.copy_output_data = [
            'md-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) + shareDir +
            '/md-{0}_{1}.tpr'.format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k5_prep_sim_kernel)

        k6_sim_kernel = Kernel(name="custom.mdrun")
        k6_sim_kernel.link_input_data = [
            shareDir + '/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(
                iterMod - 1, instance - 1)
        ]
        k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu
        k6_sim_kernel.arguments = [
            "--deffnm=md-{0}_{1}".format(iterMod - 1, instance - 1)
        ]
        #k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1),
        #                                  "md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)]
        k6_sim_kernel.copy_output_data = [
            "md-{0}_{1}.gro > ".format(iterMod - 1, instance - 1) + shareDir +
            "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1),
            "md-{0}_{1}.xtc > ".format(iterMod - 1, instance - 1) + shareDir +
            "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1)
        ]
        kernel_list.append(k6_sim_kernel)

        k7_sim_kernel = Kernel(name="custom.trjconv")
        # k7_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iterMod-1,instance-1),
        #                                  "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)]
        k7_sim_kernel.link_input_data = [
            shareDir + "/md-{0}_{1}.gro > md-{0}_{1}.gro".format(
                iterMod - 1, instance - 1),
            shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iterMod - 1, instance - 1)
        ]
        k7_sim_kernel.arguments = [
            "--echo=System",
            "--f=md-{0}_{1}.gro".format(iterMod - 1, instance - 1),
            "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1),
            "--o=md-{0}_{1}_whole.gro".format(iterMod - 1,
                                              instance - 1), "--pbc=whole"
        ]
        #k7_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1)]
        k7_sim_kernel.copy_output_data = [
            "md-{0}_{1}_whole.gro > ".format(iterMod - 1, instance - 1) +
            shareDir + "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k7_sim_kernel)

        k8_sim_kernel = Kernel(name="custom.trjconv")
        #k8_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iterMod-1,instance-1),
        #                                 "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)]
        k8_sim_kernel.link_input_data = [
            shareDir + "/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(
                iterMod - 1, instance - 1),
            shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(
                iterMod - 1, instance - 1)
        ]

        k8_sim_kernel.arguments = [
            "--echo=System",
            "--f=md-{0}_{1}.xtc".format(iterMod - 1, instance - 1),
            "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1),
            "--o=md-{0}_{1}_whole.xtc".format(iterMod - 1,
                                              instance - 1), "--pbc=whole"
        ]
        if (iterMod % Kconfig.nsave == 0):
            k8_sim_kernel.download_output_data = [
                "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".
                format(iterMod - 1, instance - 1)
            ]
        #k8_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)]
        k8_sim_kernel.copy_output_data = [
            "md-{0}_{1}_whole.xtc > ".format(iterMod - 1, instance - 1) +
            shareDir + "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1)
        ]

        kernel_list.append(k8_sim_kernel)

        return kernel_list
    def analysis_step(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf
         files generated in all the instance, generate the .crd file to be used in the next simulation.

        coco :-

                Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file.

                Arguments : --grid          = Number of points along each dimension of the CoCo histogram
                            --dims          = The number of projections to consider from the input pcz file
                            --frontpoints   = Number of CUs
                            --topfile       = Topology filename
                            --mdfile        = MD Input filename
                            --output        = Output filename
                            --cycle         = Current iteration number
        '''
        k1 = Kernel(name="md.coco")
        k1.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--mdfile=*.ncdf", "--output=pdbs",
            "--atom_selection={0}".format(Kconfig.atom_selection)
        ]
        k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE)
        k1.uses_mpi = True

        k1.link_input_data = [
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))
        ]
        for iter in range(1, iteration + 1):
            for i in range(1, Kconfig.num_CUs + 1):
                k1.link_input_data = k1.link_input_data + [
                    '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'
                    .format(iter, i)
                ]

        k1.copy_output_data = list()
        for i in range(0, Kconfig.num_CUs):
            k1.copy_output_data = k1.copy_output_data + [
                'pdbs{1}.pdb > $PRE_LOOP/pentaopt{0}{1}.pdb'.format(
                    iteration, i)
            ]

        if (iteration % Kconfig.nsave == 0):
            k1.download_output_data = [
                'coco.log > output/iter{0}/coco.log'.format(
                    iteration, instance)
            ]

        k2 = Kernel(name="md.tleap")
        k2.arguments = [
            "--numofsims={0}".format(Kconfig.num_CUs),
            "--cycle={0}".format(iteration)
        ]

        k2.link_input_data = ['$PRE_LOOP/postexec.py > postexec.py']
        for i in range(0, Kconfig.num_CUs):
            k2.link_input_data = k2.link_input_data + [
                '$PRE_LOOP/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(
                    iteration, i)
            ]

        return [k1, k2]
    def simulation_step(self, iteration, instance):
        '''
        function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated
        in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files.

        amber :-

                Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used.
                            Generates a .ncdf file in each instance.

                Arguments : --mininfile = minimization filename
                            --mdinfile  = MD input filename
                            --topfile   = Topology filename
                            --cycle     = current iteration number
        '''
        k1 = Kernel(name="md.amber")
        k1.arguments = [
            "--mininfile={0}".format(
                os.path.basename(Kconfig.minimization_input_file)),
            #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)),
            "--cycle=%s" % (iteration)
        ]
        k1.link_input_data = [
            '$PRE_LOOP/{0}'.format(
                os.path.basename(Kconfig.minimization_input_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),
            '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))
        ]
        k1.cores = 1
        if ((iteration - 1) == 0):
            k1.link_input_data = k1.link_input_data + [
                '$PRE_LOOP/{0} > min1.crd'.format(
                    os.path.basename(Kconfig.initial_crd_file))
            ]
        else:
            k1.link_input_data = k1.link_input_data + [
                '$PREV_ANALYSIS_INSTANCE_1/min{0}{1}.crd > min{2}.crd'.format(
                    iteration - 1, instance - 1, iteration)
            ]
        k1.copy_output_data = [
            'md{0}.crd > $PRE_LOOP/md_{0}_{1}.crd'.format(iteration, instance)
        ]

        k2 = Kernel(name="md.amber")
        k2.arguments = [
            "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)),
            "--topfile={0}".format(os.path.basename(Kconfig.top_file)),
            "--cycle=%s" % (iteration)
        ]
        k2.link_input_data = [
            "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)),
            "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)),
            "$PRE_LOOP/md_{0}_{1}.crd > md{0}.crd".format(iteration, instance),
        ]
        if (iteration % Kconfig.nsave == 0):
            k2.download_output_data = [
                'md{0}.ncdf > output/iter{0}/md_{0}_{1}.ncdf'.format(
                    iteration, instance)
            ]

        k2.cores = 1
        return [k1, k2]
    def analysis_step(self, iteration, instance):
        """
        function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the
        new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage
        in the next iteration.

        If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires).
        Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to
        be mentioned by the user.

        pre_lsdmap :-

                Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate
                            such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per
                            iteration.

                Arguments : --numCUs = number of simulation instances / number of small files to be concatenated

        lsdmap :-

                Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance
                            of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model).

                Arguments : --config = name of the config file to be used during LSDMap

        post_lsdmap :-


                Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
                            to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one
                            instance of post_lsdmap per iteration.

                Arguments : --num_runs              = number of configurations to be generated in the new coordinate file
                            --out                   = output filename
                            --cycle                 = iteration number
                            --max_dead_neighbors    = max dead neighbors to be considered
                            --max_alive_neighbors   = max alive neighbors to be considered
                            --numCUs                = number of simulation instances/ number of smaller files
        """

        pre_ana = Kernel(name="md.pre_lsdmap")
        pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)]
        pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"]
        for i in range(1, Kconfig.num_CUs + 1):
            pre_ana.link_input_data = pre_ana.link_input_data + [
                "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro".format(i, i - 1, iteration)
            ]
        pre_ana.copy_output_data = ["tmpha.gro > $PRE_LOOP/tmpha.gro", "tmp.gro > $PRE_LOOP/tmp.gro"]

        lsdmap = Kernel(name="md.lsdmap")
        lsdmap.arguments = ["--config={0}".format(os.path.basename(Kconfig.lsdm_config_file))]
        lsdmap.link_input_data = [
            "$PRE_LOOP/{0} > {0}".format(os.path.basename(Kconfig.lsdm_config_file)),
            "$PRE_LOOP/lsdm.py > lsdm.py",
            "$PRE_LOOP/tmpha.gro > tmpha.gro",
        ]
        lsdmap.cores = RPconfig.PILOTSIZE
        if iteration > 1:
            lsdmap.link_input_data += ["$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w".format(iteration - 1)]
            lsdmap.copy_output_data = ["weight.w > $PRE_LOOP/weight.w"]
        lsdmap.copy_output_data = ["tmpha.ev > $PRE_LOOP/tmpha.ev", "out.nn > $PRE_LOOP/out.nn"]

        if iteration % Kconfig.nsave == 0:
            lsdmap.download_output_data = ["lsdmap.log > backup/iter{0}/lsdmap.log".format(iteration)]

        post_ana = Kernel(name="md.post_lsdmap")
        post_ana.link_input_data = [
            "$PRE_LOOP/post_analyze.py > post_analyze.py",
            "$PRE_LOOP/select.py > select.py",
            "$PRE_LOOP/reweighting.py > reweighting.py",
            "$PRE_LOOP/spliter.py > spliter.py",
            "$PRE_LOOP/gro.py > gro.py",
            "$PRE_LOOP/tmp.gro > tmp.gro",
            "$PRE_LOOP/tmpha.ev > tmpha.ev",
            "$PRE_LOOP/out.nn > out.nn",
            "$PRE_LOOP/input.gro > input.gro",
        ]

        post_ana.arguments = [
            "--num_runs={0}".format(Kconfig.num_runs),
            "--out=out.gro",
            "--cycle={0}".format(iteration - 1),
            "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors),
            "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors),
            "--numCUs={0}".format(Kconfig.num_CUs),
        ]

        if iteration > 1:
            post_ana.link_input_data += [
                "$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w".format(iteration - 1)
            ]

        if iteration % Kconfig.nsave == 0:
            post_ana.download_output_data = [
                "out.gro > backup/iter{0}/out.gro".format(iteration),
                "weight.w > backup/iter{0}/weight.w".format(iteration),
            ]

        return [pre_ana, lsdmap, post_ana]
    def analysis_stage(self, iteration, instance):
        '''
        function : Perform CoCo Analysis on the output of the simulation from 
        the current iteration. Using the .xtc files generated in all instances,
        generate .gro files (as many as the num_CUs) to be used in the next 
        simulations. 
        

        coco :-

            Purpose : Runs CoCo analysis on a set of MD trajectory files 
                      in this case xtc files and generates several 
                      coordinates file to be used in next cycle

            Arguments : --grid           = Number of points along each dimension
                                           of the CoCo histogram
                        --dims           = The number of projections to 
                                           consider from the input pcz file
                        --frontpoints    = Number of CUs
                        --topfile        = Topology filename
                        --mdfile         = MD Input filename
                        --output         = Output filename
                        --cycle          = Current iteration number
                        --atom_selection = Selection of the biological part of 
                                           the system we want to consider for 
                                           analysis
        '''

        k1 = Kernel(name="custom.coco")
        iter1 = iteration - 1

        outbase, ext = opb(Kconfig.output).split('.')
        if ext == '':
            ext = '.pdb'

        k1.arguments = [
            "--grid={0}".format(Kconfig.grid),
            "--dims={0}".format(Kconfig.dims),
            "--frontpoints={0}".format(Kconfig.num_CUs),
            "--topfile=md-{0}_0.gro".format(iter1), "--mdfile=*.xtc",
            "--output={0}_{1}.gro".format(outbase, iter1),
            "--atom_selection={0}".format(Kconfig.sel)
        ]
        k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE)
        k1.uses_mpi = True
        lind = '$SHARED/md-{0}_0.gro > md-{0}_0.gro'
        k1.link_input_data = [lind.format(iter1)]
        lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc'
        for iter in range(iteration):
            for i in range(Kconfig.num_CUs):
                k1.link_input_data += [lind.format(iter, i)]

        k1.copy_output_data = []
        cout = '{0}_{1}{2}.gro > $SHARED/{0}_{1}{2}.gro'
        for i in range(Kconfig.num_CUs):
            k1.copy_output_data += [cout.format(outbase, iter1, i)]

        dod = "coco.log > output/coco-iter{0}.log"
        k1.download_output_data = [dod.format(iter1)]

        return [k1]