def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files. amber :- Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used. Generates a .ncdf file in each instance. Arguments : --mininfile = minimization filename --mdinfile = MD input filename --topfile = Topology filename --cycle = current iteration number ''' k1 = Kernel(name="md.amber") k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)), #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s"%(iteration), "--instance=%s"%(instance)] k1.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.minimization_input_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))] k1.cores=1 if((iteration-1)==0): k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/{0} > min1.crd'.format(os.path.basename(Kconfig.initial_crd_file))] else: k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/iter{2}/min{0}{1}.crd > min{2}.crd'.format(iteration-1,instance-1,iteration)] k1.copy_output_data = ['md{0}.crd > $PRE_LOOP/iter{0}/md_{0}_{1}.crd'.format(iteration,instance)] k2 = Kernel(name="md.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s"%(iteration), "--instance=%s"%(instance) ] k2.link_input_data = [ "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)), "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)), "$PRE_LOOP/iter{0}/md_{0}_{1}.crd > md{0}.crd".format(iteration,instance), ] # if(iteration%Kconfig.nsave==0): # k1.download_output_data = ['md{0}.ncdf > backup/iter{0}/md_{0}_{1}.ncdf'.format(iteration,instance)] k2.cores = 1 return [k1,k2]
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files. amber :- Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used. Generates a .ncdf file in each instance. Arguments : --mininfile = minimization filename --mdinfile = MD input filename --topfile = Topology filename and/or reference coordinates file filename --cycle = current iteration number ''' k1 = Kernel(name="custom.amber") k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s"%(iteration)] k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.minimization_input_file)), '$SHARED/{0}'.format(os.path.basename(Kconfig.top_file)), '$SHARED/{0}'.format(os.path.basename(Kconfig.initial_crd_file))] k1.cores = Kconfig.num_cores_per_sim_cu if((iteration-1)==0): k1.link_input_data = k1.link_input_data + ['$SHARED/{0} > min1.rst7'.format(os.path.basename(Kconfig.initial_crd_file))] k1.copy_output_data = ['min1.rst7 > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)] else: k1.link_input_data = k1.link_input_data + ['$SHARED/min_{0}_{1}.rst7 > min{2}.rst7'.format(iteration-1,instance-1,iteration)] k1.copy_output_data = ['md{0}.rst > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)] k2 = Kernel(name="custom.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s"%(iteration) ] k2.link_input_data = [ "$SHARED/{0}".format(os.path.basename(Kconfig.md_input_file)), "$SHARED/{0}".format(os.path.basename(Kconfig.top_file)), "$SHARED/md_{0}_{1}.rst > md{0}.rst".format(iteration,instance), ] k2.cores = Kconfig.num_cores_per_sim_cu if(iteration%Kconfig.nsave==0): k2.download_output_data = ['md{0}.nc > output/iter{0}/md_{0}_{1}.nc'.format(iteration,instance)] return [k1,k2]
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k = Kernel(name="md.coco") k.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--mdfile=*.ncdf", "--output=pentaopt%s"%(iteration), "--cycle=%s"%(iteration)] k.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/postexec.py'] k.cores = RPconfig.PILOTSIZE for iter in range(1,iteration+1): for i in range(1,Kconfig.num_CUs+1): k.link_input_data = k.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)] return k
def stage_1(self, instance): k1 = Kernel(name="md.gromacs") k1.upload_input_data = ['in.gro', 'in.top', '*.itp', 'in.mdp'] k1.executable = ['path/to/gromacs/gmx'] k1.arguments = ['grompp', '-f', 'in.mdp', '-c', 'in.gro', '-o', 'in.tpr', '-p', 'in.top'] k1.cores = 1 return k1
def stage_2(self, instance): k2 = Kernel(name="md.gromacs") k2.link_input_data = ['$STAGE_1/in.tpr > in.tpr'] k2.executable = ['path/to/gromacs/gmx'] k2.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] k2.cores = 1 return k2
def stage_2(self, instance): k = Kernel(name="misc.ccount") k.arguments = [ "--inputfile=asciifile-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.dat".format(instance) ] k.link_input_data = "$STEP_1/asciifile-{0}.dat".format(instance) k.download_output_data = "cfreqs-{0}.dat".format(instance) k.cores = 1 return k
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k1 = Kernel(name="custom.coco") k1.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints=64".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--mdfile=*.ncdf", "--output=pdbs", "--atom_selection={0}".format(Kconfig.atom_selection)] #k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE) k1.cores = 64 k1.uses_mpi = True k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.top_file))] for iter in range(1,iteration+1): # for i in range(1,Kconfig.num_CUs+1): for i in range(1+(instance-1)*64, instance*64 + 1): k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)] k1.copy_output_data = list() for i in range(0,64): k1.copy_output_data = k1.copy_output_data + ['pdbs{1}.pdb > $SHARED/pentaopt{0}{2}.pdb'.format(iteration,i,(instance-1)*64+i)] if(iteration%Kconfig.nsave==0): k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)] k2 = Kernel(name="custom.tleap",instance_type='single') k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs), "--cycle={0}".format(iteration)] k2.link_input_data = ['$SHARED/postexec.py > postexec.py'] for i in range(0,Kconfig.num_CUs): k2.link_input_data = k2.link_input_data + ['$SHARED/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)] return [k1,k2]
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. coco :- Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number --atom_selection = Selection of the biological part of the system we want to consider for analysis ''' k1_ana_kernel = Kernel(name="md.coco") k1_ana_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'.format(iteration,iteration-1)] for iter in range(1,iteration+1): for i in range(1,Kconfig.num_CUs+1): k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format(iter,i,iter-1,i-1)] k1_ana_kernel.cores = 1 k1_ana_kernel.uses_mpi = False outbase, ext = os.path.basename(Kconfig.output).split('.') #Not sure why this if condition is required #if ext == '': # ext = '.pdb' k1_ana_kernel.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile=md-{0}_0.gro".format(iteration-1), "--mdfile=*.xtc", "--output={0}_{1}.{2}".format(outbase,iteration-1,ext), "--atom_selection={0}".format(Kconfig.sel)] k1_ana_kernel.copy_output_data = [] for i in range(0,Kconfig.num_CUs): k1_ana_kernel.copy_output_data += ["{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format(outbase,iteration-1,i,ext)] k1_ana_kernel.download_output_data = ["coco.log > output/coco-iter{0}.log".format(iteration-1)] return [k1_ana_kernel]
def stage_1(self, instance): k1 = Kernel(name="md.gromacs") k1.upload_input_data = ['in.gro', 'in.top', '*.itp', 'in.mdp'] k1.executable = ['path/to/gromacs/gmx'] k1.arguments = [ 'grompp', '-f', 'in.mdp', '-c', 'in.gro', '-o', 'in.tpr', '-p', 'in.top' ] k1.cores = 1 return k1
def analysis_step(self, iteration, instance): link_input_data = [] for i in range(1,self.simulation_instances+1): link_input_data.append("$PREV_SIMULATION_INSTANCE_{instance}/asciifile.dat > asciifile-{instance}.dat".format(instance=i)) k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=asciifile.dat", "--outputfile=cfreqs.dat"] k.link_input_data = link_input_data k.download_output_data = "cfreqs.dat" k.cores = 1 return k
def analysis_step(self, iteration, instance): link_input_data = [] for i in range(1, self.simulation_instances + 1): link_input_data.append( "$PREV_SIMULATION_INSTANCE_{instance}/asciifile.dat > asciifile-{instance}.dat" .format(instance=i)) k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=asciifile.dat", "--outputfile=cfreqs.dat"] k.link_input_data = link_input_data k.download_output_data = "cfreqs.dat" k.cores = 1 return k
def step_2(self, instance): """The second step of the pipeline does a character frequency analysis on the file generated the first step. The result is transferred back to the host running this script. ..note:: The placeholder ``$STEP_1`` used in ``link_input_data`` is a reference to the working directory of step 1. ``$STEP_`` can be used analogous to refernce other steps. """ k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=asciifile-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.dat".format(instance)] k.link_input_data = "$STEP_1/asciifile-{0}.dat".format(instance) k.download_output_data = "cfreqs-{0}.dat".format(instance) k.cores = 1 return k
def step_1(self, instance): k1 = Kernel(name="md.amber") k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)), #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s"%(1)] k1.upload_input_data = [Kconfig.minimization_input_file, Kconfig.top_file, Kconfig.initial_crd_file] k1.cores=1 k1.upload_input_data = k1.upload_input_data + ['{0} > min1.crd'.format(Kconfig.initial_crd_file)] #k1.copy_output_data =['md{0}.crd > $PRE_LOOP/md_{0}_{1}.crd'.format(1,instance)] return k1
def prepare_replica_for_md(self, replica): """Specifies input and output files and passes them to kernel Arguments: replica - object representing a given replica and it's associated parameters """ input_name = self.inp_basename + "_" + str(replica.id) + "_" + str(replica.cycle) + ".md" output_name = self.inp_basename + "_" + str(replica.id) + "_" + str(replica.cycle) + ".out" k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=" + input_name, "--outputfile=" + output_name] k.upload_input_data = input_name k.download_output_data = output_name k.cores = 1 replica.cycle = replica.cycle + 1 return k
def prepare_replica_for_md(self, replica): input_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".md" output_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".out" k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=" + input_name, "--outputfile=" + output_name] k.upload_input_data = input_name k.download_output_data = output_name k.cores = 1 replica.cycle = replica.cycle + 1 return k
def prepare_replica_for_md(self, replica): input_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".md" output_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".out" k = Kernel(name="misc.ccount") k.arguments = [ "--inputfile=" + input_name, "--outputfile=" + output_name ] k.upload_input_data = input_name k.download_output_data = output_name k.cores = 1 replica.cycle = replica.cycle + 1 return k
def step_2(self,instance): k2 = Kernel(name="md.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s"%(1) ] k2.upload_input_data = [Kconfig.md_input_file] k2.link_input_data = [ "$STEP_1/{0}".format(os.path.basename(Kconfig.top_file)), "$STEP_1/md{0}.crd > md{0}.crd".format(1), ] k2.cores = 1 return k2
def analysis_stage(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k1 = Kernel(name="custom.coco") k1.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.ref_file)), "--mdfile=*.nc", "--output=coco.rst7", "--atom_selection={0}".format(Kconfig.atom_selection)] k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE) k1.uses_mpi = True k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.ref_file))] for iter in range(1,iteration+1): for i in range(1,Kconfig.num_CUs+1): k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.nc > md_{0}_{1}.nc'.format(iter,i)] k1.copy_output_data = list() for i in range(0,Kconfig.num_CUs): k1.copy_output_data = k1.copy_output_data + ['coco{1}.rst7 > $SHARED/min_{0}_{1}.rst7'.format(iteration,i)] if(iteration%Kconfig.nsave==0): k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)] return k1
def stage_1(self, instance): global INPUT_PAR global ENSEMBLE_SIZE # "simulation" tasks if instance <= ENSEMBLE_SIZE: k1 = Kernel(name="sleep") k1.arguments = ["--file=output.txt", "--text=simulation", "--duration={0}".format(INPUT_PAR_Q[instance - 1])] k1.cores = 1 # File staging can be added using the following # k1.upload_input_data = [] # k1.copy_input_data = [] # k1.link_input_data = [] # k1.copy_output_data = [] # k1.download_output_data = [] return k1 # "analysis" task else: # Emulating some more analysis executin time sleep(10) # Analysis kernel produces a random integer (<20) to push into INPUT_PAR_Q m1 = Kernel(name="randval") m1.arguments = ["--upperlimit=20"] m1.copy_input_data = [] # Copy simulation output data for inst in range(1, ENSEMBLE_SIZE + 1): m1.copy_input_data += [ '$ITER_{0}_STAGE_1_TASK_{1}/output.txt'.format( ITER[instance - 1], inst)] return m1
def prepare_replica_for_md(self, replica): """Specifies input and output files and passes them to kernel Arguments: replica - object representing a given replica and it's associated parameters """ input_name = self.inp_basename + "_" + str(replica.id) + "_" + str( replica.cycle) + ".md" output_name = self.inp_basename + "_" + str(replica.id) + "_" + str( replica.cycle) + ".out" k = Kernel(name="misc.ccount") k.arguments = [ "--inputfile=" + input_name, "--outputfile=" + output_name ] k.upload_input_data = input_name k.download_output_data = output_name k.cores = 1 replica.cycle = replica.cycle + 1 return k
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated in the previous iteration. - Preprocess the simulation parameters, coordinates structure and topology file to generate the portable binary run - .tpr - file to be used by the simulation run; - Run the simulations; - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system in the periodic boundary conditions simulation box. md.grompp: - Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information from the simulation parameters, topology file and the initial coordinates file. Arguments : --mdp = simulation parameters file - input --gro = single coordinates file - input --top = topology filename - input --ref = single coordinates file to be used as a reference for position restraints - input --tpr = portable binary run file - output md.mdrun :- Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous grompp kernel, using as input the .tpr file generated by the previous grompp kernel. Among others generates a .xtc file in each instance, all of which will be used for further analysis. Arguments : -deffnm = basename that will be used for all generated files in output but also to determine the .tpr file in input. ''' kernel_list = [] if((iteration-1)!=0): outbase, ext = os.path.basename(Kconfig.output).split('.') if ext == '': ext = '.pdb' k1_prep_min_kernel = Kernel(name="md.grompp") k1_prep_min_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))] k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'.format(outbase,iteration-2,instance-1,ext)] k1_prep_min_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)), "--ref={0}_{1}{2}.{3}".format(outbase,iteration-2,instance-1,ext), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--gro={0}".format(os.path.basename(Kconfig.restr_file)), "--tpr=min-{0}_{1}.tpr".format(iteration-1,instance-1)] k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format(iteration-1,instance-1)] kernel_list.append(k1_prep_min_kernel) k2_min_kernel = Kernel(name="md.mdrun") k2_min_kernel.link_input_data = ['$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format(iteration-1,instance-1)] k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu k2_min_kernel.arguments = ["--deffnm=min-{0}_{1}".format(iteration-1,instance-1)] k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format(iteration-1,instance-1)] kernel_list.append(k2_min_kernel) k3_prep_eq_kernel = Kernel(name="md.grompp") k3_prep_eq_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file))] k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format(iteration-1,instance-1)] k3_prep_eq_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)), "--ref=min-{0}_{1}.gro".format(iteration-1,instance-1), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--gro={0}".format(os.path.basename(Kconfig.restr_file)), "--tpr=eq-{0}_{1}.tpr".format(iteration-1,instance-1)] k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format(iteration-1,instance-1)] kernel_list.append(k3_prep_eq_kernel) k4_eq_kernel = Kernel(name="md.mdrun") k4_eq_kernel.link_input_data = ['$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format(iteration-1,instance-1)] k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu k4_eq_kernel.arguments = ["--deffnm=eq-{0}_{1}".format(iteration-1,instance-1)] k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format(iteration-1,instance-1)] kernel_list.append(k4_eq_kernel) k5_prep_sim_kernel = Kernel(name="md.grompp") k5_prep_sim_kernel.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file))] if((iteration-1)==0): k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))] k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)), "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)] else: k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + ['$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format(iteration-1,instance-1)] k5_prep_sim_kernel.arguments = ["--mdp={0}".format(os.path.basename(Kconfig.md_input_file)), "--gro=eq-{0}_{1}.gro".format(iteration-1,instance-1), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iteration-1,instance-1)] k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format(iteration-1,instance-1)] kernel_list.append(k5_prep_sim_kernel) k6_sim_kernel = Kernel(name="md.mdrun") k6_sim_kernel.link_input_data = ['$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format(iteration-1,instance-1)] k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu k6_sim_kernel.arguments = ["--deffnm=md-{0}_{1}".format(iteration-1,instance-1)] k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format(iteration-1,instance-1), "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format(iteration-1,instance-1)] kernel_list.append(k6_sim_kernel) k7_sim_kernel = Kernel(name="md.trjconv") k7_sim_kernel.link_input_data = ["$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iteration-1,instance-1), "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iteration-1,instance-1), "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iteration-1,instance-1)] k7_sim_kernel.arguments = ["--echo1=System", "--f1=md-{0}_{1}.gro".format(iteration-1,instance-1), "--s1=md-{0}_{1}.tpr".format(iteration-1,instance-1), "--o1=md-{0}_{1}_whole.gro".format(iteration-1,instance-1), "--pbc1=whole", "--echo2=System", "--f2=md-{0}_{1}.xtc".format(iteration-1,instance-1), "--s2=md-{0}_{1}.tpr".format(iteration-1,instance-1), "--o2=md-{0}_{1}_whole.xtc".format(iteration-1,instance-1), "--pbc2=whole"] if(iteration%Kconfig.nsave==0): k7_sim_kernel.download_output_data = ["md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc".format(iteration-1,instance-1)] kernel_list.append(k7_sim_kernel) return kernel_list
def simulation_stage(self, iteration, instance): #shareDir="$SHARED" #shareDir="staging://" # $SHARED is place holder and is replaced at runtime by "staging://" #https://github.com/radical-cybertools/radical.entk/blob/master/src/radical/entk/execution_plugin/staging/placeholders.py#L25 #shareDir="/work/fbettenc/radical.pilot.sandbox/p13b01_left_d3_k12_1000_k34_1000" # note tried without / before work and failed. diff err for /work/.. than work/.. #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area" shareDir = "/work/fbettenc/p14b01_pool/staging_area" prev_sim_last_iter_to_use = 48 iterMod = iteration + prev_sim_last_iter_to_use kernel_list = [] outbase, ext = os.path.basename(Kconfig.output).split('.') if ext == '': ext = '.pdb' if ((iterMod - 1) != 0): k1_prep_min_kernel = Kernel(name="custom.grompp") k1_prep_min_kernel.link_input_data = [ shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_1_mdp)), shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)), shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)), shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_1_itp_file)) ] #k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)] k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [ shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format( outbase, iterMod - 2, instance - 1, ext) ] k1_prep_min_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.grompp_1_mdp)), "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2, instance - 1, ext), #"--ref={0}".format(os.path.basename(Kconfig.restr_file)), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--gro={0}".format(os.path.basename(Kconfig.restr_file)), #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext), "--tpr=min-{0}_{1}.tpr".format(iterMod - 1, instance - 1) ] #k1_prep_min_kernel.copy_output_data = ['min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr'.format(iterMod-1,instance-1)] k1_prep_min_kernel.copy_output_data = [ 'min-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) + shareDir + '/min-{0}_{1}.tpr'.format(iterMod - 1, instance - 1) ] kernel_list.append(k1_prep_min_kernel) k2_min_kernel = Kernel(name="custom.mdrun") k2_min_kernel.link_input_data = [ shareDir + '/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format( iterMod - 1, instance - 1) ] k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu k2_min_kernel.arguments = [ "--deffnm=min-{0}_{1}".format(iterMod - 1, instance - 1) ] #k2_min_kernel.copy_output_data = ['min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro'.format(iterMod-1,instance-1)] k2_min_kernel.copy_output_data = [ 'min-{0}_{1}.gro >'.format(iterMod - 1, instance - 1) + shareDir + '/min-{0}_{1}.gro'.format(iterMod - 1, instance - 1) ] kernel_list.append(k2_min_kernel) k3_prep_eq_kernel = Kernel(name="custom.grompp") k3_prep_eq_kernel.link_input_data = [ shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_2_mdp)), shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)), shareDir + '/{0}'.format(os.path.basename(Kconfig.restr_file)), shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_2_itp_file)) ] k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [ shareDir + '/min-{0}_{1}.gro > min-{0}_{1}.gro'.format( iterMod - 1, instance - 1) ] #k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + ['$PREV_ANALYSIS_INSTANCE_1/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format(outbase,iterMod-2,instance-1,ext)] k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [ shareDir + '/{0}_{1}_{2}.{3} > {0}_{1}_{2}.{3}'.format( outbase, iterMod - 2, instance - 1, ext) ] k3_prep_eq_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.grompp_2_mdp)), "--ref={0}_{1}_{2}.{3}".format(outbase, iterMod - 2, instance - 1, ext), #"--ref=min-{0}_{1}.gro".format(iterMod-1,instance-1), "--top={0}".format(os.path.basename(Kconfig.top_file)), ##"--gro=min-{0}_{1}".format(iterMod-1,instance-1), "--gro=min-{0}_{1}.gro".format(iterMod - 1, instance - 1), #"--gro={0}_{1}_{2}.{3}".format(outbase,iterMod-2,instance-1,ext), "--tpr=eq-{0}_{1}.tpr".format(iterMod - 1, instance - 1) ] #k3_prep_eq_kernel.copy_output_data = ['eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr'.format(iterMod-1,instance-1)] k3_prep_eq_kernel.copy_output_data = [ 'eq-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) + shareDir + '/eq-{0}_{1}.tpr'.format(iterMod - 1, instance - 1) ] kernel_list.append(k3_prep_eq_kernel) k4_eq_kernel = Kernel(name="custom.mdrun") k4_eq_kernel.link_input_data = [ shareDir + '/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format( iterMod - 1, instance - 1) ] k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu k4_eq_kernel.arguments = [ "--deffnm=eq-{0}_{1}".format(iterMod - 1, instance - 1) ] #k4_eq_kernel.copy_output_data = ['eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro'.format(iterMod-1,instance-1)] k4_eq_kernel.copy_output_data = [ 'eq-{0}_{1}.gro > '.format(iterMod - 1, instance - 1) + shareDir + '/eq-{0}_{1}.gro'.format(iterMod - 1, instance - 1) ] kernel_list.append(k4_eq_kernel) k5_prep_sim_kernel = Kernel(name="custom.grompp") k5_prep_sim_kernel.link_input_data = [ shareDir + '/{0}'.format(os.path.basename(Kconfig.grompp_3_mdp)), shareDir + '/{0}'.format(os.path.basename(Kconfig.top_file)) ] if ((iterMod - 1) == 0): k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [ shareDir + '/{0}'.format(os.path.basename(Kconfig.initial_crd_file)) ] k5_prep_sim_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)), "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1) ] else: k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [ shareDir + '/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format( iterMod - 1, instance - 1) ] k5_prep_sim_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.grompp_3_mdp)), "--gro=eq-{0}_{1}.gro".format(iterMod - 1, instance - 1), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1) ] #k5_prep_sim_kernel.copy_output_data = ['md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr'.format(iterMod-1,instance-1)] k5_prep_sim_kernel.copy_output_data = [ 'md-{0}_{1}.tpr > '.format(iterMod - 1, instance - 1) + shareDir + '/md-{0}_{1}.tpr'.format(iterMod - 1, instance - 1) ] kernel_list.append(k5_prep_sim_kernel) k6_sim_kernel = Kernel(name="custom.mdrun") k6_sim_kernel.link_input_data = [ shareDir + '/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format( iterMod - 1, instance - 1) ] k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu k6_sim_kernel.arguments = [ "--deffnm=md-{0}_{1}".format(iterMod - 1, instance - 1) ] #k6_sim_kernel.copy_output_data = ["md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1), # "md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)] k6_sim_kernel.copy_output_data = [ "md-{0}_{1}.gro > ".format(iterMod - 1, instance - 1) + shareDir + "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1), "md-{0}_{1}.xtc > ".format(iterMod - 1, instance - 1) + shareDir + "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1) ] kernel_list.append(k6_sim_kernel) k7_sim_kernel = Kernel(name="custom.trjconv") # k7_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro".format(iterMod-1,instance-1), # "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)] k7_sim_kernel.link_input_data = [ shareDir + "/md-{0}_{1}.gro > md-{0}_{1}.gro".format( iterMod - 1, instance - 1), shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format( iterMod - 1, instance - 1) ] k7_sim_kernel.arguments = [ "--echo=System", "--f=md-{0}_{1}.gro".format(iterMod - 1, instance - 1), "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1), "--o=md-{0}_{1}_whole.gro".format(iterMod - 1, instance - 1), "--pbc=whole" ] #k7_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro".format(iterMod-1,instance-1)] k7_sim_kernel.copy_output_data = [ "md-{0}_{1}_whole.gro > ".format(iterMod - 1, instance - 1) + shareDir + "/md-{0}_{1}.gro".format(iterMod - 1, instance - 1) ] kernel_list.append(k7_sim_kernel) k8_sim_kernel = Kernel(name="custom.trjconv") #k8_sim_kernel.link_input_data = ["$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format(iterMod-1,instance-1), # "$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format(iterMod-1,instance-1)] k8_sim_kernel.link_input_data = [ shareDir + "/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format( iterMod - 1, instance - 1), shareDir + "/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format( iterMod - 1, instance - 1) ] k8_sim_kernel.arguments = [ "--echo=System", "--f=md-{0}_{1}.xtc".format(iterMod - 1, instance - 1), "--s=md-{0}_{1}.tpr".format(iterMod - 1, instance - 1), "--o=md-{0}_{1}_whole.xtc".format(iterMod - 1, instance - 1), "--pbc=whole" ] if (iterMod % Kconfig.nsave == 0): k8_sim_kernel.download_output_data = [ "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc". format(iterMod - 1, instance - 1) ] #k8_sim_kernel.copy_output_data = ["md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc".format(iterMod-1,instance-1)] k8_sim_kernel.copy_output_data = [ "md-{0}_{1}_whole.xtc > ".format(iterMod - 1, instance - 1) + shareDir + "/md-{0}_{1}.xtc".format(iterMod - 1, instance - 1) ] kernel_list.append(k8_sim_kernel) return kernel_list
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use coordinates file from pre_loop, else use coordinates output file from analysis generated in the previous iteration. - Preprocess the simulation parameters, coordinates structure and topology file to generate the portable binary run - .tpr - file to be used by the simulation run; - Run the simulations; - Apply gromacs to the trajectory and coordinate files to adjust the jumps of the molecular system in the periodic boundary conditions simulation box. md.grompp: - Purpose : Run gromacs preprocessing to obtain a portable binary run file (.tpr) that unifies information from the simulation parameters, topology file and the initial coordinates file. Arguments : --mdp = simulation parameters file - input --gro = single coordinates file - input --top = topology filename - input --ref = single coordinates file to be used as a reference for position restraints - input --tpr = portable binary run file - output md.mdrun :- Purpose : Run gromacs on each of the coordinate files .gro that were given in input to the previous grompp kernel, using as input the .tpr file generated by the previous grompp kernel. Among others generates a .xtc file in each instance, all of which will be used for further analysis. Arguments : -deffnm = basename that will be used for all generated files in output but also to determine the .tpr file in input. ''' kernel_list = [] if ((iteration - 1) != 0): outbase, ext = os.path.basename(Kconfig.output).split('.') if ext == '': ext = '.pdb' k1_prep_min_kernel = Kernel(name="md.grompp") k1_prep_min_kernel.link_input_data = [ '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eminrestr_md)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file)) ] k1_prep_min_kernel.link_input_data = k1_prep_min_kernel.link_input_data + [ '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}'. format(outbase, iteration - 2, instance - 1, ext) ] k1_prep_min_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.eminrestr_md)), "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2, instance - 1, ext), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--gro={0}".format(os.path.basename(Kconfig.restr_file)), "--tpr=min-{0}_{1}.tpr".format(iteration - 1, instance - 1) ] k1_prep_min_kernel.copy_output_data = [ 'min-{0}_{1}.tpr > $PRE_LOOP/min-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] kernel_list.append(k1_prep_min_kernel) k2_min_kernel = Kernel(name="md.mdrun") k2_min_kernel.link_input_data = [ '$PRE_LOOP/min-{0}_{1}.tpr > min-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] k2_min_kernel.cores = Kconfig.num_cores_per_sim_cu k2_min_kernel.arguments = [ "--deffnm=min-{0}_{1}".format(iteration - 1, instance - 1) ] k2_min_kernel.copy_output_data = [ 'min-{0}_{1}.gro > $PRE_LOOP/min-{0}_{1}.gro'.format( iteration - 1, instance - 1) ] kernel_list.append(k2_min_kernel) k3_prep_eq_kernel = Kernel(name="md.grompp") k3_prep_eq_kernel.link_input_data = [ '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.eeqrestr_md)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.restr_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.itp_file)) ] k3_prep_eq_kernel.link_input_data = k3_prep_eq_kernel.link_input_data + [ '$PRE_LOOP/min-{0}_{1}.gro > min-{0}_{1}.gro'.format( iteration - 1, instance - 1) ] k3_prep_eq_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.eeqrestr_md)), "--ref=min-{0}_{1}.gro".format(iteration - 1, instance - 1), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--gro={0}".format(os.path.basename(Kconfig.restr_file)), "--tpr=eq-{0}_{1}.tpr".format(iteration - 1, instance - 1) ] k3_prep_eq_kernel.copy_output_data = [ 'eq-{0}_{1}.tpr > $PRE_LOOP/eq-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] kernel_list.append(k3_prep_eq_kernel) k4_eq_kernel = Kernel(name="md.mdrun") k4_eq_kernel.link_input_data = [ '$PRE_LOOP/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] k4_eq_kernel.cores = Kconfig.num_cores_per_sim_cu k4_eq_kernel.arguments = [ "--deffnm=eq-{0}_{1}".format(iteration - 1, instance - 1) ] k4_eq_kernel.copy_output_data = [ 'eq-{0}_{1}.gro > $PRE_LOOP/eq-{0}_{1}.gro'.format( iteration - 1, instance - 1) ] kernel_list.append(k4_eq_kernel) k5_prep_sim_kernel = Kernel(name="md.grompp") k5_prep_sim_kernel.link_input_data = [ '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.md_input_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)) ] if ((iteration - 1) == 0): k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [ '$PRE_LOOP/{0}'.format( os.path.basename(Kconfig.initial_crd_file)) ] k5_prep_sim_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)), "--gro={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1) ] else: k5_prep_sim_kernel.link_input_data = k5_prep_sim_kernel.link_input_data + [ '$PRE_LOOP/eq-{0}_{1}.gro > eq-{0}_{1}.gro'.format( iteration - 1, instance - 1) ] k5_prep_sim_kernel.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.md_input_file)), "--gro=eq-{0}_{1}.gro".format(iteration - 1, instance - 1), "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iteration - 1, instance - 1) ] k5_prep_sim_kernel.copy_output_data = [ 'md-{0}_{1}.tpr > $PRE_LOOP/md-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] kernel_list.append(k5_prep_sim_kernel) k6_sim_kernel = Kernel(name="md.mdrun") k6_sim_kernel.link_input_data = [ '$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr'.format( iteration - 1, instance - 1) ] k6_sim_kernel.cores = Kconfig.num_cores_per_sim_cu k6_sim_kernel.arguments = [ "--deffnm=md-{0}_{1}".format(iteration - 1, instance - 1) ] k6_sim_kernel.copy_output_data = [ "md-{0}_{1}.gro > $PRE_LOOP/md-{0}_{1}.gro".format( iteration - 1, instance - 1), "md-{0}_{1}.xtc > $PRE_LOOP/md-{0}_{1}.xtc".format( iteration - 1, instance - 1) ] kernel_list.append(k6_sim_kernel) k7_sim_kernel = Kernel(name="md.trjconv") k7_sim_kernel.link_input_data = [ "$PRE_LOOP/md-{0}_{1}.gro > md-{0}_{1}.gro".format( iteration - 1, instance - 1), "$PRE_LOOP/md-{0}_{1}.xtc > md-{0}_{1}.xtc".format( iteration - 1, instance - 1), "$PRE_LOOP/md-{0}_{1}.tpr > md-{0}_{1}.tpr".format( iteration - 1, instance - 1) ] k7_sim_kernel.arguments = [ "--echo1=System", "--f1=md-{0}_{1}.gro".format(iteration - 1, instance - 1), "--s1=md-{0}_{1}.tpr".format(iteration - 1, instance - 1), "--o1=md-{0}_{1}_whole.gro".format(iteration - 1, instance - 1), "--pbc1=whole", "--echo2=System", "--f2=md-{0}_{1}.xtc".format(iteration - 1, instance - 1), "--s2=md-{0}_{1}.tpr".format(iteration - 1, instance - 1), "--o2=md-{0}_{1}_whole.xtc".format(iteration - 1, instance - 1), "--pbc2=whole" ] if (iteration % Kconfig.nsave == 0): k7_sim_kernel.download_output_data = [ "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc". format(iteration - 1, instance - 1) ] kernel_list.append(k7_sim_kernel) return kernel_list
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k1 = Kernel(name="md.coco") k1.arguments = [ "--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--mdfile=*.ncdf", "--output=pdbs", "--atom_selection={0}".format(Kconfig.atom_selection) ] k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE) k1.uses_mpi = True k1.link_input_data = [ '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)) ] for iter in range(1, iteration + 1): for i in range(1, Kconfig.num_CUs + 1): k1.link_input_data = k1.link_input_data + [ '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf' .format(iter, i) ] k1.copy_output_data = list() for i in range(0, Kconfig.num_CUs): k1.copy_output_data = k1.copy_output_data + [ 'pdbs{1}.pdb > $PRE_LOOP/pentaopt{0}{1}.pdb'.format( iteration, i) ] if (iteration % Kconfig.nsave == 0): k1.download_output_data = [ 'coco.log > output/iter{0}/coco.log'.format( iteration, instance) ] k2 = Kernel(name="md.tleap") k2.arguments = [ "--numofsims={0}".format(Kconfig.num_CUs), "--cycle={0}".format(iteration) ] k2.link_input_data = ['$PRE_LOOP/postexec.py > postexec.py'] for i in range(0, Kconfig.num_CUs): k2.link_input_data = k2.link_input_data + [ '$PRE_LOOP/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format( iteration, i) ] return [k1, k2]
def analysis_stage(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iterMod. Using the .xtc files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. coco :- Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iterMod number --atom_selection = Selection of the biological part of the system we want to consider for analysis ''' #shareDir="$SHARED" #shareDir="/work/fbettenc/radical.pilot.sandbox/rp.session.js-17-187.jetstream-cloud.org.hal9000.017508.0005-pilot.0000/staging_area" shareDir = "/work/fbettenc/p14b01_pool/staging_area" prev_sim_last_iter_to_use = 48 iterMod = iteration + prev_sim_last_iter_to_use k1_ana_kernel = Kernel(name="custom.coco") outbase, ext = os.path.basename(Kconfig.output).split('.') if ext == '': ext = '.pdb' k1_ana_kernel.arguments = [ "--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile=md-{0}_0.gro".format(iterMod - 1), "--mdfile=*.xtc", "--output={0}_{1}_.gro".format(outbase, iterMod - 1), "--atom_selection={0}".format(Kconfig.sel) ] # k1_ana_kernel.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE) k1_ana_kernel.cores = min( Kconfig.num_CUs * (iterMod + 1), RPconfig.PILOTSIZE ) # set to iterMod+1 bec at first iter coco analysis of k8 output so coco is iter ahead sort of print " " print "iter,iterMod,AnaCUcores = ", iteration, ", ", iterMod, ", ", k1_ana_kernel.cores print " " k1_ana_kernel.uses_mpi = True k1_ana_kernel.link_input_data = [ shareDir + '/md-{1}_0.gro > md-{1}_0.gro'.format(iterMod, iterMod - 1) ] for iter in range(1, iterMod + 1): for i in range(1, Kconfig.num_CUs + 1): k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [ shareDir + '/md-{2}_{3}.xtc > md-{2}_{3}.xtc'.format( iter, i, iter - 1, i - 1) ] k1_ana_kernel.copy_output_data = [] for i in range(0, Kconfig.num_CUs): #k1_ana_kernel.copy_output_data += ["{0}_{1}_{2}.gro > $SHARED/{0}_{1}_{2}.gro".format(outbase,iterMod-1,i,ext)] k1_ana_kernel.copy_output_data += [ "{0}_{1}_{2}.gro > ".format(outbase, iterMod - 1, i, ext) + shareDir + "/{0}_{1}_{2}.gro".format(outbase, iterMod - 1, i, ext) ] k1_ana_kernel.download_output_data = [ "coco.log > output/coco-iter{0}.log".format(iterMod - 1) ] return [k1_ana_kernel]
def simulation_stage(self, iteration, instance): kernel_list = [] iter1 = iteration - 1 inst1 = instance - 1 outbase, ext = opb(Kconfig.output).split('.') shrd = '$SHARED/{0}' if ext == '': ext = '.pdb' if ((iter1) != 0): # Kernel 1: Grompp before energy min step. k1 = Kernel(name="custom.grompp") k1.link_input_data = [ shrd.format(opb(Kconfig.grompp_1_mdp)), shrd.format(opb(Kconfig.top_file)), shrd.format(opb(Kconfig.restr_file)), shrd.format(opb(Kconfig.grompp_1_itp_file)) ] prev = '$PREV_ANALYSIS_INSTANCE_1/{0}_{1}{2}.{3} > {0}_{1}{2}.{3}' k1.link_input_data += [ prev.format(outbase, iteration - 2, inst1, ext) ] k1.arguments = [ "--mdp={0}".format(opb(Kconfig.grompp_1_mdp)), "--ref={0}_{1}{2}.{3}".format(outbase, iteration - 2, inst1, ext), "--top={0}".format(opb(Kconfig.top_file)), "--gro={0}".format(opb(Kconfig.restr_file)), "--tpr=min-{0}_{1}.tpr".format(iter1, inst1) ] cout = 'min-{0}_{1}.tpr > $SHARED/min-{0}_{1}.tpr' k1.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k1) # Kernel 2: Restrained energy min step. k2 = Kernel(name="custom.mdrun") lind = '$SHARED/min-{0}_{1}.tpr > min-{0}_{1}.tpr' k2.link_input_data = [lind.format(iter1, inst1)] k2.cores = Kconfig.num_cores_per_sim_cu k2.arguments = ["--deffnm=min-{0}_{1}".format(iter1, inst1)] cout = 'min-{0}_{1}.gro > $SHARED/min-{0}_{1}.gro' k2.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k2) # Kernel 3: Grompp before restrained MD step k3 = Kernel(name="custom.grompp") k3.link_input_data = [ shrd.format(opb(Kconfig.grompp_2_mdp)), shrd.format(opb(Kconfig.top_file)), shrd.format(opb(Kconfig.restr_file)), shrd.format(opb(Kconfig.grompp_2_itp_file)) ] lind = '$SHARED/min-{0}_{1}.gro > min-{0}_{1}.gro' k3.link_input_data += [lind.format(iter1, inst1)] k3.arguments = [ "--mdp={0}".format(opb(Kconfig.grompp_2_mdp)), "--ref=min-{0}_{1}.gro".format(iter1, inst1), "--top={0}".format(opb(Kconfig.top_file)), "--gro={0}".format(opb(Kconfig.restr_file)), "--tpr=eq-{0}_{1}.tpr".format(iter1, inst1) ] cout = 'eq-{0}_{1}.tpr > $SHARED/eq-{0}_{1}.tpr' k3.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k3) # Kernel 4: Restrained MD step. k4 = Kernel(name="custom.mdrun") lind = '$SHARED/eq-{0}_{1}.tpr > eq-{0}_{1}.tpr' k4.link_input_data = [lind.format(iter1, inst1)] k4.cores = Kconfig.num_cores_per_sim_cu k4.arguments = ["--deffnm=eq-{0}_{1}".format(iter1, inst1)] cout = 'eq-{0}_{1}.gro > $SHARED/eq-{0}_{1}.gro' k4.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k4) # Kernel 5: Grompp before unrestrained (production) MD. k5 = Kernel(name="custom.grompp") k5.link_input_data = [ shrd.format(opb(Kconfig.grompp_3_mdp)), shrd.format(opb(Kconfig.top_file)) ] if ((iter1) == 0): k5.link_input_data += [shrd.format(opb(Kconfig.initial_crd_file))] k5.arguments = [ "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)), "--gro={0}".format(opb(Kconfig.initial_crd_file)), "--top={0}".format(opb(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iter1, inst1) ] else: lind = '$SHARED/eq-{0}_{1}.gro > eq-{0}_{1}.gro' k5.link_input_data += [lind.format(iter1, inst1)] k5.arguments = [ "--mdp={0}".format(opb(Kconfig.grompp_3_mdp)), "--gro=eq-{0}_{1}.gro".format(iter1, inst1), "--top={0}".format(opb(Kconfig.top_file)), "--tpr=md-{0}_{1}.tpr".format(iter1, inst1) ] cout = 'md-{0}_{1}.tpr > $SHARED/md-{0}_{1}.tpr' k5.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k5) #Kernel 6: Production MD step. k6 = Kernel(name="custom.mdrun") lind = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr' k6.link_input_data = [lind.format(iter1, inst1)] k6.cores = Kconfig.num_cores_per_sim_cu k6.arguments = ["--deffnm=md-{0}_{1}".format(iter1, inst1)] cout = 'md-{0}_{1}.gro > $SHARED/md-{0}_{1}.gro' xout = 'md-{0}_{1}.xtc > $SHARED/md-{0}_{1}.xtc' k6.copy_output_data = [ cout.format(iter1, inst1), xout.format(iter1, inst1) ] kernel_list.append(k6) #Kernel 7: Post-processing of output structure file to correct # PBC effects. k7 = Kernel(name="custom.trjconv") lind = '$SHARED/md-{0}_{1}.gro > md-{0}_{1}.gro' tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr' k7.link_input_data = [ lind.format(iter1, inst1), tpin.format(iter1, inst1) ] k7.arguments = [ "--echo=System", "--f=md-{0}_{1}.gro".format(iter1, inst1), "--s=md-{0}_{1}.tpr".format(iter1, inst1), "--o=md-{0}_{1}_whole.gro".format(iter1, inst1), "--pbc=whole" ] cout = 'md-{0}_{1}_whole.gro > $SHARED/md-{0}_{1}.gro' k7.copy_output_data = [cout.format(iter1, inst1)] kernel_list.append(k7) #Kernel 8: Post-processing of output trajectory file to correct # PBC effects. k8 = Kernel(name="custom.trjconv") lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc' tpin = '$SHARED/md-{0}_{1}.tpr > md-{0}_{1}.tpr' k8.link_input_data = [ lind.format(iter1, inst1), tpin.format(iter1, inst1) ] k8.arguments = [ "--echo=System", "--f=md-{0}_{1}.xtc".format(iter1, inst1), "--s=md-{0}_{1}.tpr".format(iter1, inst1), "--o=md-{0}_{1}_whole.xtc".format(iter1, inst1), "--pbc=whole" ] if (iteration % Kconfig.nsave == 0): dout = "md-{0}_{1}_whole.xtc > output/iter{0}/md-{0}_{1}_whole.xtc" k8.download_output_data = [dout.format(iter1, inst1)] xout = 'md-{0}_{1}_whole.xtc > $SHARED/md-{0}_{1}.xtc' k8.copy_output_data = [xout.format(iter1, inst1)] kernel_list.append(k8) return kernel_list
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files. amber :- Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used. Generates a .ncdf file in each instance. Arguments : --mininfile = minimization filename --mdinfile = MD input filename --topfile = Topology filename --cycle = current iteration number ''' k1 = Kernel(name="md.amber") k1.arguments = [ "--mininfile={0}".format( os.path.basename(Kconfig.minimization_input_file)), #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s" % (iteration) ] k1.link_input_data = [ '$PRE_LOOP/{0}'.format( os.path.basename(Kconfig.minimization_input_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file)) ] k1.cores = 1 if ((iteration - 1) == 0): k1.link_input_data = k1.link_input_data + [ '$PRE_LOOP/{0} > min1.crd'.format( os.path.basename(Kconfig.initial_crd_file)) ] else: k1.link_input_data = k1.link_input_data + [ '$PREV_ANALYSIS_INSTANCE_1/min{0}{1}.crd > min{2}.crd'.format( iteration - 1, instance - 1, iteration) ] k1.copy_output_data = [ 'md{0}.crd > $PRE_LOOP/md_{0}_{1}.crd'.format(iteration, instance) ] k2 = Kernel(name="md.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s" % (iteration) ] k2.link_input_data = [ "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)), "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)), "$PRE_LOOP/md_{0}_{1}.crd > md{0}.crd".format(iteration, instance), ] if (iteration % Kconfig.nsave == 0): k2.download_output_data = [ 'md{0}.ncdf > output/iter{0}/md_{0}_{1}.ncdf'.format( iteration, instance) ] k2.cores = 1 return [k1, k2]
def analysis_step(self, iteration, instance): ''' function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage in the next iteration. If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires). Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to be mentioned by the user. pre_lsdmap :- Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per iteration. Arguments : --numCUs = number of simulation instances / number of small files to be concatenated lsdmap :- Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model). Arguments : --config = name of the config file to be used during LSDMap post_lsdmap :- Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one instance of post_lsdmap per iteration. Arguments : --num_runs = number of configurations to be generated in the new coordinate file --out = output filename --cycle = iteration number --max_dead_neighbors = max dead neighbors to be considered --max_alive_neighbors = max alive neighbors to be considered --numCUs = number of simulation instances/ number of smaller files ''' pre_ana = Kernel(name="md.pre_lsdmap") pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)] pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"] for i in range(1, Kconfig.num_CUs + 1): pre_ana.link_input_data = pre_ana.link_input_data + [ "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro". format(i, i - 1, iteration) ] pre_ana.copy_output_data = [ 'tmpha.gro > $PRE_LOOP/tmpha.gro', 'tmp.gro > $PRE_LOOP/tmp.gro' ] lsdmap = Kernel(name="md.lsdmap") lsdmap.arguments = [ "--config={0}".format(os.path.basename(Kconfig.lsdm_config_file)) ] lsdmap.link_input_data = [ '$PRE_LOOP/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '$PRE_LOOP/tmpha.gro > tmpha.gro' ] lsdmap.cores = 1 if iteration > 1: lsdmap.link_input_data += [ '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w'. format(iteration - 1) ] lsdmap.copy_output_data = ['weight.w > $PRE_LOOP/weight.w'] lsdmap.copy_output_data = [ 'tmpha.ev > $PRE_LOOP/tmpha.ev', 'out.nn > $PRE_LOOP/out.nn' ] if (iteration % Kconfig.nsave == 0): lsdmap.download_output_data = [ 'lsdmap.log > backup/iter{0}/lsdmap.log'.format(iteration) ] post_ana = Kernel(name="md.post_lsdmap") post_ana.link_input_data = [ "$PRE_LOOP/post_analyze.py > post_analyze.py", "$PRE_LOOP/selection.py > selection.py", "$PRE_LOOP/reweighting.py > reweighting.py", "$PRE_LOOP/spliter.py > spliter.py", "$PRE_LOOP/gro.py > gro.py", "$PRE_LOOP/tmp.gro > tmp.gro", "$PRE_LOOP/tmpha.ev > tmpha.ev", "$PRE_LOOP/out.nn > out.nn", "$PRE_LOOP/input.gro > input.gro" ] post_ana.arguments = [ "--num_runs={0}".format(Kconfig.num_runs), "--out=out.gro", "--cycle={0}".format(iteration - 1), "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors), "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors), "--numCUs={0}".format(Kconfig.num_CUs) ] if iteration > 1: post_ana.link_input_data += [ '$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w'. format(iteration - 1) ] if (iteration % Kconfig.nsave == 0): post_ana.download_output_data = [ 'out.gro > backup/iter{0}/out.gro'.format(iteration), 'weight.w > backup/iter{0}/weight.w'.format(iteration) ] return [pre_ana, lsdmap, post_ana]
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. coco :- Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number --atom_selection = Selection of the biological part of the system we want to consider for analysis ''' k1_ana_kernel = Kernel(name="md.coco") k1_ana_kernel.link_input_data = [ '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$SIMULATION_ITERATION_{0}_INSTANCE_1/md-{1}_0.gro > md-{1}_0.gro'. format(iteration, iteration - 1) ] for iter in range(1, iteration + 1): for i in range(1, Kconfig.num_CUs + 1): k1_ana_kernel.link_input_data = k1_ana_kernel.link_input_data + [ '$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md-{2}_{3}.xtc > md-{2}_{3}.xtc' .format(iter, i, iter - 1, i - 1) ] k1_ana_kernel.cores = 1 k1_ana_kernel.uses_mpi = False outbase, ext = os.path.basename(Kconfig.output).split('.') #Not sure why this if condition is required #if ext == '': # ext = '.pdb' k1_ana_kernel.arguments = [ "--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile=md-{0}_0.gro".format(iteration - 1), "--mdfile=*.xtc", "--output={0}_{1}.{2}".format(outbase, iteration - 1, ext), "--atom_selection={0}".format(Kconfig.sel) ] k1_ana_kernel.copy_output_data = [] for i in range(0, Kconfig.num_CUs): k1_ana_kernel.copy_output_data += [ "{0}_{1}{2}.gro > $PRE_LOOP/{0}_{1}{2}.gro".format( outbase, iteration - 1, i, ext) ] k1_ana_kernel.download_output_data = [ "coco.log > output/coco-iter{0}.log".format(iteration - 1) ] return [k1_ana_kernel]
def analysis_step(self, iteration, instance): """ function : Merge the results of each of the simulation instances and run LSDMap analysis to generate the new coordinate file. Split this new coordinate file into smaller files to be used by the simulation stage in the next iteration. If a step as multiple kernels (say k1, k2), data generated in k1 is implicitly moved to k2 (if k2 requires). Data which needs to be moved between the various steps (pre_loop, simulation_step, analysis_step) needs to be mentioned by the user. pre_lsdmap :- Purpose : The output of each gromacs instance in the simulation_step is a small coordinate file. Concatenate such files from each of the gromacs instances to form a larger file. There is one instance of pre_lsdmap per iteration. Arguments : --numCUs = number of simulation instances / number of small files to be concatenated lsdmap :- Purpose : Perform LSDMap on the large coordinate file to generate weights and eigen values. There is one instance of lsdmap per iteration (MSSA : Multiple Simulation Single Analysis model). Arguments : --config = name of the config file to be used during LSDMap post_lsdmap :- Purpose : Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop to generate the new coordinate file to be used by the simulation_step in the next iteration. There is one instance of post_lsdmap per iteration. Arguments : --num_runs = number of configurations to be generated in the new coordinate file --out = output filename --cycle = iteration number --max_dead_neighbors = max dead neighbors to be considered --max_alive_neighbors = max alive neighbors to be considered --numCUs = number of simulation instances/ number of smaller files """ pre_ana = Kernel(name="md.pre_lsdmap") pre_ana.arguments = ["--numCUs={0}".format(Kconfig.num_CUs)] pre_ana.link_input_data = ["$PRE_LOOP/pre_analyze.py > pre_analyze.py"] for i in range(1, Kconfig.num_CUs + 1): pre_ana.link_input_data = pre_ana.link_input_data + [ "$SIMULATION_ITERATION_{2}_INSTANCE_{0}/out.gro > out{1}.gro".format(i, i - 1, iteration) ] pre_ana.copy_output_data = ["tmpha.gro > $PRE_LOOP/tmpha.gro", "tmp.gro > $PRE_LOOP/tmp.gro"] lsdmap = Kernel(name="md.lsdmap") lsdmap.arguments = ["--config={0}".format(os.path.basename(Kconfig.lsdm_config_file))] lsdmap.link_input_data = [ "$PRE_LOOP/{0} > {0}".format(os.path.basename(Kconfig.lsdm_config_file)), "$PRE_LOOP/lsdm.py > lsdm.py", "$PRE_LOOP/tmpha.gro > tmpha.gro", ] lsdmap.cores = RPconfig.PILOTSIZE if iteration > 1: lsdmap.link_input_data += ["$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight.w".format(iteration - 1)] lsdmap.copy_output_data = ["weight.w > $PRE_LOOP/weight.w"] lsdmap.copy_output_data = ["tmpha.ev > $PRE_LOOP/tmpha.ev", "out.nn > $PRE_LOOP/out.nn"] if iteration % Kconfig.nsave == 0: lsdmap.download_output_data = ["lsdmap.log > backup/iter{0}/lsdmap.log".format(iteration)] post_ana = Kernel(name="md.post_lsdmap") post_ana.link_input_data = [ "$PRE_LOOP/post_analyze.py > post_analyze.py", "$PRE_LOOP/select.py > select.py", "$PRE_LOOP/reweighting.py > reweighting.py", "$PRE_LOOP/spliter.py > spliter.py", "$PRE_LOOP/gro.py > gro.py", "$PRE_LOOP/tmp.gro > tmp.gro", "$PRE_LOOP/tmpha.ev > tmpha.ev", "$PRE_LOOP/out.nn > out.nn", "$PRE_LOOP/input.gro > input.gro", ] post_ana.arguments = [ "--num_runs={0}".format(Kconfig.num_runs), "--out=out.gro", "--cycle={0}".format(iteration - 1), "--max_dead_neighbors={0}".format(Kconfig.max_dead_neighbors), "--max_alive_neighbors={0}".format(Kconfig.max_alive_neighbors), "--numCUs={0}".format(Kconfig.num_CUs), ] if iteration > 1: post_ana.link_input_data += [ "$ANALYSIS_ITERATION_{0}_INSTANCE_1/weight.w > weight_new.w".format(iteration - 1) ] if iteration % Kconfig.nsave == 0: post_ana.download_output_data = [ "out.gro > backup/iter{0}/out.gro".format(iteration), "weight.w > backup/iter{0}/weight.w".format(iteration), ] return [pre_ana, lsdmap, post_ana]
def analysis_stage(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .xtc files generated in all instances, generate .gro files (as many as the num_CUs) to be used in the next simulations. coco :- Purpose : Runs CoCo analysis on a set of MD trajectory files in this case xtc files and generates several coordinates file to be used in next cycle Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number --atom_selection = Selection of the biological part of the system we want to consider for analysis ''' k1 = Kernel(name="custom.coco") iter1 = iteration - 1 outbase, ext = opb(Kconfig.output).split('.') if ext == '': ext = '.pdb' k1.arguments = [ "--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile=md-{0}_0.gro".format(iter1), "--mdfile=*.xtc", "--output={0}_{1}.gro".format(outbase, iter1), "--atom_selection={0}".format(Kconfig.sel) ] k1.cores = min(Kconfig.num_CUs, RPconfig.PILOTSIZE) k1.uses_mpi = True lind = '$SHARED/md-{0}_0.gro > md-{0}_0.gro' k1.link_input_data = [lind.format(iter1)] lind = '$SHARED/md-{0}_{1}.xtc > md-{0}_{1}.xtc' for iter in range(iteration): for i in range(Kconfig.num_CUs): k1.link_input_data += [lind.format(iter, i)] k1.copy_output_data = [] cout = '{0}_{1}{2}.gro > $SHARED/{0}_{1}{2}.gro' for i in range(Kconfig.num_CUs): k1.copy_output_data += [cout.format(outbase, iter1, i)] dod = "coco.log > output/coco-iter{0}.log" k1.download_output_data = [dod.format(iter1)] return [k1]