def stage_1(self, instance): ## Stage 1 simply processes the parameter files to get started. ## Runs only for the first iteration k1 = Kernel(name="sleep") k1.arguments = ["--t=10"] k1.cores = 1 return k1
def stage_3(self, instance): ## Stage 3 is the compute intensive gromacs mdrun stage. It operates over the .tpr file generated by stage 2. ## Stages its output (*.xvg, *.log) to shared location on remote -- 'staging_area' under the pilot folder. The ## same output is also downloaded to the local machine to keep a backup. k3 = Kernel(name="sleep") k3.arguments = ["--t=10"] k3.cores = 1 return k3
def stage_2(self, instance): ## Stage 2 is the gromacs preprocessing stage. In the first iteration, uses the output of stage 1. Otherwise ## operates over the .gro file from stage 3 of the previous iteration and .mdp file from stage 4 of the previous ## of iteration k2 = Kernel(name='sleep') k2.arguments = ["--t=10"] k2.cores=1 return k2
def stage_4(self, instance): ## Stage 4 executes the alchemical analysis script and prepares the .mdp file for the next iteration. ## It currently operates on all data (*.xvg, *.log) that is available at that moment in './data'. ## './data' maps to the 'staging_area' that was referred to in stage 3. Downloads the results, output, error and ## the new mdp file to the local machine to keep a backup k4 = Kernel(name="sleep") k4.arguments = ['--t=10'] k4.cores = 1 return k4
def stage_1(self, instance): global ENSEMBLE_SIZE global INTERVAL global N_STEPS, ITER k1 = Kernel(name='openmm') k1.arguments = ['--ns=%s' % NS] if ITER == 1: k1.link_input_data = ['$SHARED/ala2.pdb', '$SHARED/simulate.py'] else: k1.link_input_data = [ '$ITER_%s_STAGE_2_TASK_1/ala2-%s.pdb > ala2.pdb' % (ITER - 1, instance - 1), '$SHARED/simulate.py' ] return k1
def stage_1(self, instance): """ This stage calculates the number of characters in a UTF file. """ global N k = Kernel(name="ccount") k.arguments = [ "--inputfile=UTF-8-demo.txt", "--outputfile=ccount-{0}.txt".format(instance) ] k.copy_input_data = ["$SHARED/UTF-8-demo.txt"] for i in range(N): k.copy_input_data.append( '$SHARED/UTF-8-demo.txt > UTF-8-{0}.txt'.format(i)) return k
def stage_2(self, instance): global ITER, NS, ENSEMBLE_SIZE, TOTAL_ITERS k1 = Kernel(name="msm") k1.arguments = [ '--lag=2', '--stride=10', '--clusters=100', '--components=4', '--pdb=ala2.pdb' ] k1.link_input_data = ['$SHARED/ala2.pdb', '$SHARED/analyze.py'] for i in range(ITER): for j in range(ENSEMBLE_SIZE): k1.link_input_data += [ '$ITER_%s_STAGE_1_TASK_%s/trajectory.dcd > trajectory-%s_%s.dcd' % (i + 1, j + 1, i, j) ] k1.cores = 1 k1.download_output_data = [ 'microstate_info.txt > dur-%s-ensemble-%s-iters-%s/microstate_info-%s.txt' % (NS, ENSEMBLE_SIZE, TOTAL_ITERS, ITER), 'macrostate_info.txt > dur-%s-ensemble-%s-iters-%s/macrostate_info-%s.txt' % (NS, ENSEMBLE_SIZE, TOTAL_ITERS, ITER) ] ITER += 1 return k1
def stage_1(self, instance): global INPUT_PAR global ENSEMBLE_SIZE # "simulation" tasks if instance <= ENSEMBLE_SIZE: k1 = Kernel(name="sleep") k1.arguments = [ "--file=output.txt", "--text=simulation", "--duration={0}".format(INPUT_PAR_Q[instance - 1]) ] k1.cores = 1 # File staging can be added using the following #k1.upload_input_data = [] #k1.copy_input_data = [] #k1.link_input_data = [] #k1.copy_output_data = [] #k1.download_output_data = [] return k1 # "analysis" task else: # Emulating some more analysis executin time sleep(10) # Analysis kernel produces a random integer (<20) to push into INPUT_PAR_Q m1 = Kernel(name="randval") m1.arguments = ["--upperlimit=20"] m1.copy_input_data = [] # Copy simulation output data for inst in range(1, ENSEMBLE_SIZE + 1): m1.copy_input_data += [ '$ITER_{0}_STAGE_1_TASK_{1}/output.txt'.format( ITER[instance - 1], inst) ] return m1
def vary_tasks(f): num_pipelines = 1 num_stages = 1 num_tasks = [1, 10, 100, 1000, 10000, 100000] for tasks in num_tasks: start = time.time() set_of_tasks = frozenset([Kernel() for _ in range(tasks) ]) # equivalent to one stage list_stages = tuple([set_of_tasks for _ in range(num_stages) ]) # equivalent to one pipe set_of_pipelines = set([list_stages for _ in range(num_pipelines) ]) # equivalent to one application end = time.time() f.write('pipes: %s, stages: %s, tasks: %s, time: %s\n' % (num_pipelines, num_stages, tasks, end - start))
def vary_tasks(f): num_pipelines = 1 num_stages = 1 num_tasks = [1, 10, 100, 1000, 10000, 100000] for tasks in num_tasks: print 'starting' start = time.time() # Create empty set of pipes which is equivalent to the entire application set_of_pipes = set() for pipe in range(num_pipelines): # Create empty graph for each pipe Gpipe = nx.Graph() for stage in range(num_stages): # Create a set of tasks to be added to each stage set_of_tasks = frozenset([Kernel() for _ in range(tasks)]) cur_stage = set_of_tasks # Add current stage to current pipe Gpipe.add_node(cur_stage) print Gpipe.number_of_nodes() # Add current pipe to set of pipes set_of_pipes.add(Gpipe) end = time.time() f.write('pipes: %s, stages: %s, tasks: %s, time: %s\n' % (num_pipelines, num_stages, tasks, end - start)) print 'pipes: %s, stages: %s, tasks: %s, time: %s\n' % ( num_pipelines, num_stages, tasks, end - start)