def simulation_step(self, iteration, instance): ''' function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the previous iteration. Run gromacs in each instance using these files. gromacs :- Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. Arguments : --grompp = gromacs parameters filename --topol = topology filename ''' #---------------------------------------------------------------------------------------------------------- # GROMPP kernel k1 = Kernel(name="custom.grompp") k1.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.mdp_file)), "--gro=start.gro", "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=topol.tpr" ] k1.link_input_data = [ '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file)) ] k1.copy_output_data = [ 'topol.tpr > $SHARED/iter_{1}/topol_{0}.tpr'.format( instance - 1, iteration - 1) ] if (iteration - 1 == 0): k1.link_input_data.append( '$PRE_LOOP/temp/start{0}.gro > start.gro'.format(instance - 1)) else: k1.link_input_data.append( '$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro' .format(iteration - 1, instance - 1)) #---------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------- # MDRUN kernel k2 = Kernel(name="custom.mdrun") k2.arguments = ["--size=1", "--tpr=topol.tpr", "--out=out.gro"] k2.link_input_data = [ '$SHARED/iter_{1}/topol_{0}.tpr > topol.tpr'.format( instance - 1, iteration - 1) ] #---------------------------------------------------------------------------------------------------------- return [k1, k2]
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k1 = Kernel(name="custom.coco") k1.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints=64".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--mdfile=*.ncdf", "--output=pdbs", "--atom_selection={0}".format(Kconfig.atom_selection)] #k1.cores = min(Kconfig.num_CUs,RPconfig.PILOTSIZE) k1.cores = 64 k1.uses_mpi = True k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.top_file))] for iter in range(1,iteration+1): # for i in range(1,Kconfig.num_CUs+1): for i in range(1+(instance-1)*64, instance*64 + 1): k1.link_input_data = k1.link_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)] k1.copy_output_data = list() for i in range(0,64): k1.copy_output_data = k1.copy_output_data + ['pdbs{1}.pdb > $SHARED/pentaopt{0}{2}.pdb'.format(iteration,i,(instance-1)*64+i)] if(iteration%Kconfig.nsave==0): k1.download_output_data = ['coco.log > output/iter{0}/coco.log'.format(iteration,instance)] k2 = Kernel(name="custom.tleap",instance_type='single') k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs), "--cycle={0}".format(iteration)] k2.link_input_data = ['$SHARED/postexec.py > postexec.py'] for i in range(0,Kconfig.num_CUs): k2.link_input_data = k2.link_input_data + ['$SHARED/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)] return [k1,k2]
def analysis_step(self, iteration, instance): k1 = Kernel(name="misc.randval") k1.arguments = ["--upperlimit=3", "--filename=iters.dat"] k1.download_output_data = "iters.dat" k2 = Kernel(name="misc.randval") k2.arguments = ["--upperlimit=16","--filename=sims.dat"] k2.download_output_data = "sims.dat" return [k1,k2]
def analysis_step(self, iteration, instance): k1 = Kernel(name="misc.randval") k1.arguments = ["--upperlimit=3", "--filename=iters.dat"] k1.download_output_data = "iters.dat" k2 = Kernel(name="misc.randval") k2.arguments = ["--upperlimit=16", "--filename=sims.dat"] k2.download_output_data = "sims.dat" return [k1, k2]
def simulation_step(self, iteration, instance): ''' function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the previous iteration. Run gromacs in each instance using these files. gromacs :- Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. Arguments : --grompp = gromacs parameters filename --topol = topology filename ''' #---------------------------------------------------------------------------------------------------------- # GROMPP kernel k1 = Kernel(name="custom.grompp") k1.arguments = [ "--mdp={0}".format(os.path.basename(Kconfig.mdp_file)), "--gro=start.gro", "--top={0}".format(os.path.basename(Kconfig.top_file)), "--tpr=topol.tpr" ] k1.link_input_data = ['$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file))] k1.copy_output_data = ['topol.tpr > $SHARED/iter_{1}/topol_{0}.tpr'.format(instance-1,iteration-1)] if (iteration-1==0): k1.link_input_data.append('$PRE_LOOP/temp/start{0}.gro > start.gro'.format(instance-1)) else: k1.link_input_data.append('$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro'.format(iteration-1,instance-1)) #---------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------- # MDRUN kernel k2 = Kernel(name="custom.mdrun") k2.arguments = [ "--size=1", "--tpr=topol.tpr", "--out=out.gro" ] k2.link_input_data = ['$SHARED/iter_{1}/topol_{0}.tpr > topol.tpr'.format(instance-1,iteration-1)] #---------------------------------------------------------------------------------------------------------- return [k1,k2]
def analysis_step(self, iteration, instance): ''' function : Perform CoCo Analysis on the output of the simulation from the current iteration. Using the .ncdf files generated in all the instance, generate the .crd file to be used in the next simulation. coco :- Purpose : Runs CoCo analysis on a set of .ncdf files and generates a coordinate file. Arguments : --grid = Number of points along each dimension of the CoCo histogram --dims = The number of projections to consider from the input pcz file --frontpoints = Number of CUs --topfile = Topology filename --mdfile = MD Input filename --output = Output filename --cycle = Current iteration number ''' k1 = Kernel(name="md.coco") k1.arguments = ["--grid={0}".format(Kconfig.grid), "--dims={0}".format(Kconfig.dims), "--frontpoints={0}".format(Kconfig.num_CUs), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--mdfile=*.ncdf", "--output=pentaopt%s"%(iteration)] k1.cores = RPconfig.PILOTSIZE k1.copy_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)),'$PRE_LOOP/pycoco.py > pycoco.py'] for iter in range(1,iteration+1): for i in range(1,Kconfig.num_CUs+1): k1.copy_input_data = k1.copy_input_data + ['$SIMULATION_ITERATION_{0}_INSTANCE_{1}/md{0}.ncdf > md_{0}_{1}.ncdf'.format(iter,i)] temp=list() for i in range(0,Kconfig.num_CUs): temp = temp + ['pentaopt{0}{1}.pdb > $PRE_LOOP/iter{0}/pentaopt{0}{1}.pdb'.format(iteration,i)] k1.copy_output_data = temp k2 = Kernel(name="md.tleap") k2.arguments = ["--numofsims={0}".format(Kconfig.num_CUs), "--cycle={0}".format(iteration)] temp=list() for i in range(0,Kconfig.num_CUs): temp = temp + ['$PRE_LOOP/iter{0}/pentaopt{0}{1}.pdb > pentaopt{0}{1}.pdb'.format(iteration,i)] k2.link_input_data=temp + ['$PRE_LOOP/postexec.py'] temp=list() for i in range(0,Kconfig.num_CUs): temp = temp + ['min{0}{1}.crd > $PRE_LOOP/iter{2}/min{0}{1}.crd'.format(iteration,i,iteration+1)] k2.copy_output_data = temp return [k1,k2]
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files. amber :- Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used. Generates a .ncdf file in each instance. Arguments : --mininfile = minimization filename --mdinfile = MD input filename --topfile = Topology filename --cycle = current iteration number ''' k1 = Kernel(name="md.amber") k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)), #"--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s"%(iteration), "--instance=%s"%(instance)] k1.link_input_data = ['$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.minimization_input_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/{0}'.format(os.path.basename(Kconfig.initial_crd_file))] k1.cores=1 if((iteration-1)==0): k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/{0} > min1.crd'.format(os.path.basename(Kconfig.initial_crd_file))] else: k1.link_input_data = k1.link_input_data + ['$PRE_LOOP/iter{2}/min{0}{1}.crd > min{2}.crd'.format(iteration-1,instance-1,iteration)] k1.copy_output_data = ['md{0}.crd > $PRE_LOOP/iter{0}/md_{0}_{1}.crd'.format(iteration,instance)] k2 = Kernel(name="md.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s"%(iteration), "--instance=%s"%(instance) ] k2.link_input_data = [ "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.md_input_file)), "$PRE_LOOP/{0}".format(os.path.basename(Kconfig.top_file)), "$PRE_LOOP/iter{0}/md_{0}_{1}.crd > md{0}.crd".format(iteration,instance), ] # if(iteration%Kconfig.nsave==0): # k1.download_output_data = ['md{0}.ncdf > backup/iter{0}/md_{0}_{1}.ncdf'.format(iteration,instance)] k2.cores = 1 return [k1,k2]
def simulation_step(self, iteration, instance): ''' function : if iteration = 1, use .crd file from pre_loop, else use .crd output from analysis generated in the previous iteration. Perform amber on the .crd files to generate a set of .ncdf files. amber :- Purpose : Run amber on each of the coordinate files. Currently, a non-MPI version of Amber is used. Generates a .ncdf file in each instance. Arguments : --mininfile = minimization filename --mdinfile = MD input filename --topfile = Topology filename and/or reference coordinates file filename --cycle = current iteration number ''' k1 = Kernel(name="custom.amber") k1.arguments = ["--mininfile={0}".format(os.path.basename(Kconfig.minimization_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--crdfile={0}".format(os.path.basename(Kconfig.initial_crd_file)), "--cycle=%s"%(iteration)] k1.link_input_data = ['$SHARED/{0}'.format(os.path.basename(Kconfig.minimization_input_file)), '$SHARED/{0}'.format(os.path.basename(Kconfig.top_file)), '$SHARED/{0}'.format(os.path.basename(Kconfig.initial_crd_file))] k1.cores = Kconfig.num_cores_per_sim_cu if((iteration-1)==0): k1.link_input_data = k1.link_input_data + ['$SHARED/{0} > min1.rst7'.format(os.path.basename(Kconfig.initial_crd_file))] k1.copy_output_data = ['min1.rst7 > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)] else: k1.link_input_data = k1.link_input_data + ['$SHARED/min_{0}_{1}.rst7 > min{2}.rst7'.format(iteration-1,instance-1,iteration)] k1.copy_output_data = ['md{0}.rst > $SHARED/md_{0}_{1}.rst'.format(iteration,instance)] k2 = Kernel(name="custom.amber") k2.arguments = [ "--mdinfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--topfile={0}".format(os.path.basename(Kconfig.top_file)), "--cycle=%s"%(iteration) ] k2.link_input_data = [ "$SHARED/{0}".format(os.path.basename(Kconfig.md_input_file)), "$SHARED/{0}".format(os.path.basename(Kconfig.top_file)), "$SHARED/md_{0}_{1}.rst > md{0}.rst".format(iteration,instance), ] k2.cores = Kconfig.num_cores_per_sim_cu if(iteration%Kconfig.nsave==0): k2.download_output_data = ['md{0}.nc > output/iter{0}/md_{0}_{1}.nc'.format(iteration,instance)] return [k1,k2]
def pre_loop(self): """ function : transfers input files and intermediate executables pre_grlsd_loop :- Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : --inputfile = file to be split --numCUs = number of simulation instances/ number of smaller files """ k = Kernel(name="md.pre_grlsd_loop") k.upload_input_data = [ Kconfig.md_input_file, Kconfig.lsdm_config_file, Kconfig.top_file, Kconfig.mdp_file, "{0}/spliter.py".format(Kconfig.misc_loc), "{0}/gro.py".format(Kconfig.misc_loc), "{0}/run.py".format(Kconfig.misc_loc), "{0}/pre_analyze.py".format(Kconfig.misc_loc), "{0}/post_analyze.py".format(Kconfig.misc_loc), "{0}/select.py".format(Kconfig.misc_loc), "{0}/reweighting.py".format(Kconfig.misc_loc), ] k.download_input_data = [ "http://sourceforge.net/p/lsdmap/git/ci/extasy-0.1-rc2/tree/lsdmap/lsdm.py?format=raw > lsdm.py" ] k.arguments = [ "--inputfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--numCUs={0}".format(Kconfig.num_CUs), ] return k
def simulation_step(self, iteration, instance): """ function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the previous iteration. Run gromacs in each instance using these files. gromacs :- Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. Arguments : --grompp = gromacs parameters filename --topol = topology filename """ gromacs = Kernel(name="md.gromacs") gromacs.arguments = [ "--grompp={0}".format(os.path.basename(Kconfig.mdp_file)), "--topol={0}".format(os.path.basename(Kconfig.top_file)), ] gromacs.link_input_data = [ "$PRE_LOOP/{0} > {0}".format(os.path.basename(Kconfig.mdp_file)), "$PRE_LOOP/{0} > {0}".format(os.path.basename(Kconfig.top_file)), "$PRE_LOOP/run.py > run.py", ] if iteration - 1 == 0: gromacs.link_input_data.append("$PRE_LOOP/temp/start{0}.gro > start.gro".format(instance - 1)) else: gromacs.link_input_data.append( "$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro".format(iteration - 1, instance - 1) ) return gromacs
def element_comparison(self, elements1, elements2): """In the comparison, we take the previously generated modified trajectory and perform a Hausdorff distance calculation between all the unique pairs of trajectories """ input_filenames1 = ["traj_flat%d.npz.npy" % (el1) for el1 in elements1] input_filenames2 = ["traj_flat%d.npz.npy" % (el2) for el2 in elements2] output_filename = "comparison-%03d-%03d.dat" % (elements1[0], elements2[0]) print "Element Comparison {0} - {1}".format(elements1, elements2) k = Kernel(name="my.hausdorff") k.arguments = [ "--dist_file=hausdorff_kernel.py", "--inputfile1={0}".format(input_filenames1), "--inputfile2={0}".format(input_filenames2), "--outputfile={0}".format(output_filename) ] k.upload_input_data = ["hausdorff_kernel.py"] # If the input data are in in a web server use the following # k.download_input_data = ["/<PATH>/<TO>/<WEB>?<SERVER>/<WITH>/hausdorff_kernel.py > hausdorff_kernel.py"] # If the input data are in a folder to the target machine use the following # k.link_input_data = ["/<PATH>/<TO>/<FOLDER>/<WITH>/hausdorff_kernel.py > hausdorff_kernel.py"] # The result files comparison-x-y.dat are downloaded. k.download_output_data = output_filename print "Element Comparison Finished {0} - {1}".format( elements1, elements2) return k
def stage_1(self, instance): k = Kernel(name="synapse.sample") k.arguments = [ "--path=$HOME/ves/synapse_local", "--mode=sample", "--flops=1000", "--samples=1" ] return k
def prepare_replica_for_exchange(self, replica): """Prepares md.re_exchange kernel to launch namd_matrix_calculator.py script on target resource in order to populate columns of swap matrix. Arguments: replica - object representing a given replica and it's attributes Returns: k - an instance of Kernel class """ basename = self.inp_basename[:-5] matrix_col = "matrix_column_{cycle}_{replica}.dat"\ .format(cycle=replica.cycle-1, replica=replica.id ) k = Kernel(name="md.re_exchange") k.arguments = [ "--calculator=namd_matrix_calculator.py", "--replica_id=" + str(replica.id), "--replica_cycle=" + str(replica.cycle - 1), "--replicas=" + str(self.replicas), "--replica_basename=" + str(basename) ] k.upload_input_data = "namd_matrix_calculator.py" k.download_output_data = matrix_col return k
def set2element_initialization(self,element): # Creating an ASCII file by using the misc.mkfile kernel. Each file represents # a element of the set. print "Creating Element {0}".format(element) k = Kernel(name = "misc.mkfile") k.arguments = ["--size=10000", "--filename=newfile_{0}.dat".format(element)] return k
def simulation_step(self, iteration, instance): """In the simulation step we """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=asciifile.dat"] k.exists_remote = ['asciifile.dat'] return [k]
def prepare_replica_for_md(self, replica): """Specifies input and output files and passes them to kernel Arguments: replica - object representing a given replica and it's associated parameters """ input_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".md" output_name = self.inp_basename + "_" + \ str(replica.id) + "_" + \ str(replica.cycle) + ".out" k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=" + \ input_name + " " + \ self.sh_file, "--outputfile=" + \ output_name] # no need to specify shared data here # everything in shared_files list will be staged in k.upload_input_data = [input_name] k.download_output_data = output_name replica.cycle = replica.cycle + 1 return k
def element_comparison(self, elements1, elements2): """In the comparison, we take the previously generated modified trajectory and perform a Hausdorff distance calculation between all the unique pairs of trajectories """ input_filenames1 = ["traj_flat%d.npz.npy"%(el1) for el1 in elements1] input_filenames2 = ["traj_flat%d.npz.npy"%(el2) for el2 in elements2] output_filename = "comparison-%03d-%03d.dat"%(elements1[0],elements2[0]) print "Element Comparison {0} - {1}".format(elements1,elements2) k = Kernel(name="my.hausdorff") k.arguments = ["--dist_file=hausdorff_kernel.py", "--inputfile1={0}".format(input_filenames1), "--inputfile2={0}".format(input_filenames2), "--outputfile={0}".format(output_filename)] k.upload_input_data = ["hausdorff_kernel.py"] # If the input data are in in a web server use the following # k.download_input_data = ["/<PATH>/<TO>/<WEB>?<SERVER>/<WITH>/hausdorff_kernel.py > hausdorff_kernel.py"] # If the input data are in a folder to the target machine use the following # k.link_input_data = ["/<PATH>/<TO>/<FOLDER>/<WITH>/hausdorff_kernel.py > hausdorff_kernel.py"] # The result files comparison-x-y.dat are downloaded. k.download_output_data = output_filename print "Element Comparison Finished {0} - {1}".format(elements1,elements2) return k
def simulation_step(self, iteration, instance): k = Kernel(name="misc.mkfile") k.arguments = [ "--size=1000", "--filename=simulation-{0}-{1}.dat".format(iteration, instance) ] return k
def step_1(self, instance): """The first step of the pipeline creates a 1 MB ASCI file. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000000", "--filename=asciifile-{0}.dat".format(instance)] k.download_output_data = ['asciifile-{0}.dat'.format(instance)] return k
def prepare_replica_for_exchange(self, replica): """Prepares md.re_exchange kernel to launch namd_matrix_calculator.py script on target resource in order to populate columns of swap matrix. Arguments: replica - object representing a given replica and it's attributes Returns: k - an instance of Kernel class """ basename = self.inp_basename[:-5] matrix_col = "matrix_column_{cycle}_{replica}.dat"\ .format(cycle=replica.cycle-1, replica=replica.id ) k = Kernel(name="md.re_exchange") k.arguments = ["--calculator=namd_matrix_calculator.py", "--replica_id=" + str(replica.id), "--replica_cycle=" + str(replica.cycle-1), "--replicas=" + str(self.replicas), "--replica_basename=" + str(basename)] k.upload_input_data = "namd_matrix_calculator.py" k.download_output_data = matrix_col return k
def simulation_step(self, iteration, instance): """In the simulation step we simply create files with 1000 characters. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=asciifile-{0}.dat".format(instance)] k.download_output_data = ['asciifile-{0}.dat > iter{1}/asciifile-{0}.dat'.format(instance,iteration)] return [k]
def simulation_step(self, iteration, instance): ''' function : In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the previous iteration. Run gromacs in each instance using these files. gromacs :- Purpose : Run the gromacs simulation on each of the smaller files. Parameter files and executables are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. Arguments : --grompp = gromacs parameters filename --topol = topology filename ''' gromacs = Kernel(name="md.gromacs") gromacs.arguments = [ "--grompp={0}".format(os.path.basename(Kconfig.mdp_file)), "--topol={0}".format(os.path.basename(Kconfig.top_file)) ] gromacs.link_input_data = [ '$PRE_LOOP/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$PRE_LOOP/{0} > {0}'.format(os.path.basename(Kconfig.top_file)), '$PRE_LOOP/run.py > run.py' ] if (iteration - 1 == 0): gromacs.link_input_data.append( '$PRE_LOOP/temp/start{0}.gro > start.gro'.format(instance - 1)) else: gromacs.link_input_data.append( '$ANALYSIS_ITERATION_{0}_INSTANCE_1/temp/start{1}.gro > start.gro' .format(iteration - 1, instance - 1)) return gromacs
def analysis_step(self, iteration, instance): """In the analysis step, we take the previously generated simulation output and perform a Levenshtein distance calculation between it and the 'reference' file. ..note:: The placeholder ``$PRE_LOOP`` used in ``link_input_data`` is a reference to the working directory of pre_loop. The placeholder ``$PREV_SIMULATION`` used in ``link_input_data`` is a reference to the working directory of the previous simulation step. It is also possible to reference a specific simulation step using ``$SIMULATION_N`` or all simulations via ``$SIMULATIONS``. Analogous placeholders exist for ``ANALYSIS``. """ input_filename = "simulation-{0}-{1}.dat".format(iteration, instance) output_filename = "analysis-{0}-{1}.dat".format(iteration, instance) k = Kernel(name="misc.levenshtein") k.link_input_data = ["$PRE_LOOP/reference.dat", "$SIMULATION_ITERATION_{1}_INSTANCE_{2}/{0}".format(input_filename,iteration,instance),"$PRE_LOOP/levenshtein.py"] k.arguments = ["--inputfile1=reference.dat", "--inputfile2={0}".format(input_filename), "--outputfile={0}".format(output_filename)] k.download_output_data = output_filename return k
def step_1(self, instance): k = Kernel(name="misc.chksum") k.arguments = ["--inputfile=UTF-8-demo.txt", "--outputfile=checksum{0}.sha1".format(instance)] k.download_input_data = "htpttpt://malformed.url" k.download_output_data = "checksum{0}.sha1".format(instance) return k
def pre_loop(self): ''' function : transfers input files and intermediate executables pre_grlsd_loop :- Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : --inputfile = file to be split --numCUs = number of simulation instances/ number of smaller files ''' k = Kernel(name="md.pre_grlsd_loop") k.upload_input_data = [Kconfig.md_input_file, Kconfig.lsdm_config_file, Kconfig.top_file, Kconfig.mdp_file, '{0}/spliter.py'.format(Kconfig.misc_loc), '{0}/gro.py'.format(Kconfig.misc_loc), '{0}/run.py'.format(Kconfig.misc_loc), '{0}/pre_analyze.py'.format(Kconfig.misc_loc), '{0}/post_analyze.py'.format(Kconfig.misc_loc), '{0}/selection.py'.format(Kconfig.misc_loc), '{0}/reweighting.py'.format(Kconfig.misc_loc)] k.arguments = ["--inputfile={0}".format(os.path.basename(Kconfig.md_input_file)),"--numCUs={0}".format(Kconfig.num_CUs)] return k
def analysis_stage(self, iteration, instance): """In the analysis stage, we take the previously generated simulation output and perform a Levenshtein distance calculation between it and the 'reference' file. ..note:: The placeholder ``$PRE_LOOP`` used in ``link_input_data`` is a reference to the working directory of pre_loop. The placeholder ``$PREV_SIMULATION`` used in ``link_input_data`` is a reference to the working directory of the previous simulation stage. It is also possible to reference a specific simulation stage using ``$SIMULATION_N`` or all simulations via ``$SIMULATIONS``. Analogous placeholders exist for ``ANALYSIS``. """ input_filename = "simulation-{0}-{1}.dat".format(iteration, instance) output_filename = "analysis-{0}-{1}.dat".format(iteration, instance) k = Kernel(name="misc.levenshtein") k.link_input_data = ["$PRE_LOOP/reference.dat", "$SIMULATION_ITERATION_{1}_INSTANCE_{2}/{0}".format(input_filename,iteration,instance),"$PRE_LOOP/levenshtein.py"] k.arguments = ["--inputfile1=reference.dat", "--inputfile2={0}".format(input_filename), "--outputfile={0}".format(output_filename)] k.download_output_data = output_filename return k
def pre_loop(self): k = Kernel(name="misc.chksum") k.arguments = ["--inputfile=UTF-8-demo.txt", "--outputfile=checksum.sha1"] k.download_input_data = "htpttpt://malformed.url" k.download_output_data = "checksum.sha1" return k
def simulation_step(self, iteration, instance): """In the simulation step we """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=asciifile-{0}.dat".format(instance)] k.download_output_data = ['asciifile-{0}.dat'.format(instance)] return [k]
def simulation_step(self, iteration, instance): """In the simulation step we """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=asciifile.dat"] k.exists_remote = ["asciifile.dat"] return [k]
def element_comparison(self, element1, element2): """In the comparison, we take the previously generated files and perform a difference between those files. Each file coresponds to an elements of the set. """ input_filename1 = "asciifile-{0}.dat".format(element1) input_filename2 = "asciifile-{0}.dat".format(element2) output_filename = "comparison-{0}-{1}.log".format(element1, element2) print "Comparing {0} with {1}. Saving result in {2}".format( input_filename1, input_filename2, output_filename) # Compare the previously generated files with the misc.diff kernel and # write the result of each comparison to a specific output file. k = Kernel(name="misc.diff") k.arguments = [ "--inputfile1={0}".format(input_filename1), "--inputfile2={0}".format(input_filename2), "--outputfile={0}".format(output_filename) ] # Download the result files. k.download_output_data = output_filename return k
def pre_loop(self): ''' function : transfers input files and intermediate executables pre_grlsd_loop :- Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : --inputfile = file to be split --numCUs = number of simulation instances/ number of smaller files ''' k = Kernel(name="md.pre_grlsd_loop") k.upload_input_data = [ Kconfig.md_input_file, Kconfig.lsdm_config_file, Kconfig.top_file, Kconfig.mdp_file, '{0}/spliter.py'.format(Kconfig.misc_loc), '{0}/gro.py'.format(Kconfig.misc_loc), '{0}/run.py'.format(Kconfig.misc_loc), '{0}/pre_analyze.py'.format(Kconfig.misc_loc), '{0}/post_analyze.py'.format(Kconfig.misc_loc), '{0}/selection.py'.format(Kconfig.misc_loc), '{0}/reweighting.py'.format(Kconfig.misc_loc) ] k.arguments = [ "--inputfile={0}".format(os.path.basename(Kconfig.md_input_file)), "--numCUs={0}".format(Kconfig.num_CUs) ] return k
def simulation_stage(self, iteration, instance): """In the simulation stage we simply create files with 1000 characters. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=asciifile-{0}.dat".format(instance)] k.download_output_data = ['asciifile-{0}.dat > iter{1}/asciifile-{0}.dat'.format(instance,iteration)] return [k]
def stage_2(self, instance): k2 = Kernel(name="md.gromacs") k2.link_input_data = ['$STAGE_1/in.tpr > in.tpr'] k2.executable = ['path/to/gromacs/gmx'] k2.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] k2.cores = 1 return k2
def step_2(self, instances): k = Kernel(name="misc.cat") k.upload_input_data = ['./output_file_2.txt > file2.txt'] k.copy_input_data = ['$STEP_1/temp.txt > file1.txt'] k.arguments = ["--file1=file1.txt", "--file2=file2.txt"] k.download_output_data = ['./file1.txt > output_file.txt'] return k
def step_1(self, instance): k = Kernel(name="misc.hello") k.upload_input_data = ['./input_file.txt > temp.txt'] k.arguments = ["--file=temp.txt"] k.download_output_data = [ './temp.txt > output_file_{0}.txt'.format(instance) ] return k
def pre_loop(self): """pre_loop is executed before the main simulation-analysis loop is started. In this example we create an initial 1 kB random ASCII file that we use as the reference for all analysis steps. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=reference.dat"] return k
def step_1(self, instance): k = Kernel(name="misc.hello") k.upload_input_data = ['./input_file.txt > temp.txt'] k.arguments = ["--file=temp.txt"] k.download_output_data = [ './temp.txt > /var/lib/jenkins/workspace/EnsembleMDTesting/temp_results/remote_file.txt' ] return k
def step_1(self, instance): """The first step of the pipeline creates a 1 MB ASCI file. """ k = Kernel(name="misc.mkfile") k.arguments = [ "--size=1000000", "--filename=asciifile-{0}.dat".format(instance) ] return k
def stage_1(self, instances): """This step sleeps for 60 seconds.""" k = Kernel(name="spark") k.upload_input_data = ['leafletfinder.py','traj_positions.npy'] k.arguments = ["--exec-mem=60g","--driver-mem=30g", "--max-result-size=25g","--spark-script=leafletfinder.py","--input-file=traj_positions.npy","--partitions=378"] return k
def stage_1(self, instance): k1 = Kernel(name="md.gromacs") k1.upload_input_data = ['in.gro', 'in.top', '*.itp', 'in.mdp'] k1.executable = ['path/to/gromacs/gmx'] k1.arguments = ['grompp', '-f', 'in.mdp', '-c', 'in.gro', '-o', 'in.tpr', '-p', 'in.top'] k1.cores = 1 return k1
def simulation_step(self, iteration, instance): """The simulation step generates a 1 kB file containing random ASCII characters that is compared against the 'reference' file in the subsequent analysis step. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=simulation-{0}-{1}.dat".format(iteration, instance)] return k
def stage_2(self, instances): k = Kernel(name="misc.cat") k.upload_input_data = ['./output_file_2.txt > file2.txt'] k.copy_input_data = ['$STAGE_1/temp.txt > file1.txt'] k.arguments = ["--file1=file1.txt","--file2=file2.txt"] k.download_output_data = ['./file1.txt > output_file.txt'] return k
def analysis_step(self, iteration, instance): """ In the analysis step, we use the 'randval' kernel to output a random number within the upperlimit. The output is simply a number (and no other messages). Hence, we do not mention and extraction scripts. The pattern automatically picks up the number. """ k = Kernel(name="misc.randval") k.arguments = ["--upperlimit=16"] return [k]
def step_1(self, instance): k = Kernel(name="misc.chksum") k.arguments = [ "--inputfile=UTF-8-demo.txt", "--outputfile=checksum{0}.sha1".format(instance) ] k.download_input_data = "http://testing.saga-project.org/cybertools/UTF-8-demo.txt" k.download_output_data = "checksum{0}.sha1".format(instance)
def analysis_stage(self, iteration, instance): """ In the analysis stage, we use the 'randval' kernel to output a random number within the upperlimit. The output is simply a number (and no other messages). Hence, we do not mention and extraction scripts. The pattern automatically picks up the number. """ k = Kernel(name="misc.randval_2") k.arguments = ["--upperlimit=16"] return [k]
def simulation_stage(self, iteration, instance): """The simulation stage generates a 1 kB file containing random ASCII characters that is compared against the 'reference' file in the subsequent analysis stage. """ k = Kernel(name="misc.mkfile") k.arguments = ["--size=1000", "--filename=simulation-{0}-{1}.dat".format(iteration, instance)] return k
def step_2(self, instance): """The second step of the pipeline does a character frequency analysis on the file generated the first step. """ k = Kernel(name="misc.ccount") k.arguments = ["--inputfile=asciifile-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.dat".format(instance)] k.link_input_data = "$STEP_1/asciifile-{0}.dat".format(instance) return k
def step_1(self, instance): k = Kernel(name="misc.chksum") k.arguments = [ "--inputfile={0}".format(self._checksum_inputfile), "--outputfile={0}".format(self._download_output) ] k.upload_input_data = self._upload_directives k.download_output_data = self._download_output return k
def simulation_step(self, iteration, instance): """In the simulation step we """ k = Kernel(name="misc.mkfile") k.arguments = [ "--size=1000", "--filename=asciifile-{0}.dat".format(instance) ] k.download_output_data = ['asciifile-{0}.dat'.format(instance)] return [k]
def stage_2(self, instance): k = Kernel(name="misc.ccount") k.arguments = [ "--inputfile=asciifile-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.dat".format(instance) ] k.copy_input_data = "$STAGE_1/asciifile-{0}.dat".format(instance) k.download_output_data = "cfreqs-{0}.dat".format(instance) return k
def step_3(self, instance): """The third step of the pipeline creates a checksum of the output file of the second step. The result is transferred back to the host running this script. """ k = Kernel(name="misc.chksum") k.arguments = ["--inputfile=cfreqs-{0}.dat".format(instance), "--outputfile=cfreqs-{0}.sha1".format(instance)] k.link_input_data = "$STEP_2/cfreqs-{0}.dat".format(instance) k.download_output_data = "cfreqs-{0}.sha1".format(instance) return k
def test_ArgumentError(self): from radical.ensemblemd import Kernel from radical.ensemblemd.exceptions import ArgumentError try: k = Kernel(name="misc.hello") k.arguments = ["a"] except ArgumentError, er: print 'ArgumentError: Passed: ', er assert "Invalid argument(s) for kernel 'misc.hello': Unknown / malformed argument 'a'. Valid arguments are {'--file=': {'_value': None, 'mandatory': True, 'description': 'The input file.', '_is_set': False}}." \ in er