def __call__(self, inputs=None, outputs=None, arguments=None, includes=None, local=False, environment=None, collect=False): abstraction = CurrentAbstraction() nest = CurrentNest() # Engine Functions define inputs and output member attributes try: inputs = inputs or self.inputs outputs = outputs or self.outputs except AttributeError: pass inputs = parse_input_list(inputs) outputs = parse_output_list(outputs, inputs) includes = parse_input_list(includes) + parse_input_list(self.includes) command = self.command_format(inputs, outputs, arguments) options = Options(environment=dict(self.environment), collect=inputs if collect else None) if local: options.local = True if environment: options.environment.update(environment) if nest.batch: options.batch = nest.batch nest.schedule(abstraction, self, command, list(inputs) + list(includes), outputs, options, nest.symbol) return outputs
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) inputs = parse_input_list(self.inputs) includes = parse_input_list(self.includes) output = self.outputs nest = CurrentNest() if not os.path.isabs(output): output = os.path.join(nest.work_dir, output) while len(inputs) > self.group: next_inputs = [] for group in groups(inputs, self.group): output_file = next(nest.stash) next_inputs.append(output_file) with Options(local=self.options.local, collect=group if self.collect else None): yield function(group, output_file, None, includes) inputs = next_inputs with Options(local=self.options.local, collect=inputs if self.collect else None): yield function(inputs, output, None, includes)
def __call__(self, inputs=None, outputs=None, arguments=None, includes=None, local=False, environment=None, collect=False): abstraction = CurrentAbstraction() nest = CurrentNest() # Engine Functions define inputs and output member attributes try: inputs = inputs or self.inputs outputs = outputs or self.outputs except AttributeError: pass inputs = parse_input_list(inputs) outputs = parse_output_list(outputs, inputs) includes = parse_input_list(includes) + parse_input_list(self.includes) command = self.command_format(inputs, outputs, arguments) options = Options(environment=dict(self.environment), collect=inputs if collect else None) if local: options.local = True if environment: options.environment.update(environment) nest.schedule(abstraction, self, command, list(inputs) + list(includes), outputs, options) return outputs
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) includes = parse_input_list(self.includes) # First format inputs and figure out the number of iteration to perform group_size = 0 inputs = [] if isinstance(self.inputs, list): # If inputs is a matrix if isinstance(self.inputs[0], list): for i, ingroup in enumerate(self.inputs): inputs.append(parse_input_list(ingroup)) if group_size == 0: group_size = len(ingroup) if len(ingroup) != group_size: raise IOError("Iteration group size are different between inputs!") # If inputs is a simple list else: group_size = len(self.inputs) inputs = parse_input_list(self.inputs) # If inputs is a string else: group_size = 1 inputs = parse_input_list(self.inputs) for iter in range(group_size): iteration_inputs = [] if isinstance(inputs[0], list): for i, input in enumerate(inputs): iteration_inputs.append(input[iter]) else: iteration_inputs.append(inputs[iter]) input_pattern = self._longestCommonSubstr(list(map(os.path.basename, list(map(str, iteration_inputs))))) iteration_outputs = [] if isinstance(self.outputs, list): # If outputs is a matrix if isinstance(self.outputs[0], list): for i, outgroup in enumerate(self.outputs): iteration_outputs.append(outgroup[iter]) # If inputs is a simple list and a motif table elif isinstance(self.outputs[0], str) and '{' in self.outputs[0]: for motif in self.outputs: iteration_outputs.extend(parse_output_list(motif, input_pattern)) # If a simple string table elif isinstance(self.outputs[0], str): iteration_outputs = parse_output_list(self.outputs[iter], input_pattern) # If inputs is a string else: iteration_outputs = parse_output_list(self.outputs, input_pattern) with Options(local=self.options.local): yield function(iteration_inputs, iteration_outputs, None, includes)
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) inputs_a = parse_input_list(self.inputs_a) inputs_b = parse_input_list(self.inputs_b) includes = parse_input_list(self.includes) # If native is enabled, then use allpairs_master, otherwise # generate tasks as part of the DAG. # # Note: parse_output_list flattens inputs, so we need to manually # translate pairs into a single string. if self.native: # Store inputs A and B lists as required by allpairs_master inputs_a_file = next(self.nest.stash) with open(inputs_a_file, 'w') as fs: for input_file in map(str, inputs_a): fs.write(input_file + '\n') inputs_b_file = next(self.nest.stash) with open(inputs_b_file, 'w') as fs: for input_file in map(str, inputs_b): fs.write(input_file + '\n') inputs = [inputs_a_file, inputs_b_file] outputs = parse_output_list(self.outputs, map(lambda p: '_'.join( map(lambda s: os.path.basename(str(s)), p)),inputs)) # Schedule allpairs_master with Options(local=True, collect=[i] if self.collect else None): allpairs_master = parse_function( 'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port)) yield allpairs_master(inputs, outputs, function.path, includes + [function.path]) else: inputs = list(itertools.product(inputs_a, inputs_b)) outputs = parse_output_list(self.outputs, map(lambda p: '_'.join( map(lambda s: os.path.basename(str(s)), p)),inputs)) # We use a wrapper script to collect the output of the # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as # used by allpairs_master. for i, o in zip(inputs, outputs): tmp_output = next(self.nest.stash) with Options(local=self.options.local, collect=[i] if self.collect else None): output = function(i, tmp_output, None, includes) # Wrapper script should run locally and we should always # try to collect the temporary intermediate output file. with Options(local=True, collect=[tmp_output]): yield AllPairsCompareWrapper(output, o, map(lambda p: os.path.basename(str(p)), i), None)
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) inputs_a = parse_input_list(self.inputs_a) inputs_b = parse_input_list(self.inputs_b) includes = parse_input_list(self.includes) # If native is enabled, then use allpairs_master, otherwise # generate tasks as part of the DAG. # # Note: parse_output_list flattens inputs, so we need to manually # translate pairs into a single string. if self.native: # Store inputs A and B lists as required by allpairs_master inputs_a_file = next(self.nest.stash) with open(inputs_a_file, 'w') as fs: for input_file in map(str, inputs_a): fs.write(input_file + '\n') inputs_b_file = next(self.nest.stash) with open(inputs_b_file, 'w') as fs: for input_file in map(str, inputs_b): fs.write(input_file + '\n') inputs = [inputs_a_file, inputs_b_file] outputs = parse_output_list(self.outputs, ['_'.join( [os.path.basename(str(s)) for s in p]) for p in inputs]) # Schedule allpairs_master with Options(local=True, collect=[i] if self.collect else None): allpairs_master = parse_function( 'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port)) yield allpairs_master(inputs, outputs, function.path, includes + [function.path]) else: inputs = list(itertools.product(inputs_a, inputs_b)) outputs = parse_output_list(self.outputs, ['_'.join( [os.path.basename(str(s)) for s in p]) for p in inputs]) # We use a wrapper script to collect the output of the # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as # used by allpairs_master. for i, o in zip(inputs, outputs): tmp_output = next(self.nest.stash) with Options(local=self.options.local, collect=[i] if self.collect else None): output = function(i, tmp_output, None, includes) # Wrapper script should run locally and we should always # try to collect the temporary intermediate output file. with Options(local=True, collect=[tmp_output]): yield AllPairsCompareWrapper(output, o, [os.path.basename(str(p)) for p in i], None)
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) inputs = parse_input_list(self.inputs) outputs = parse_output_list(self.outputs, inputs) includes = parse_input_list(self.includes) for i, o in zip(inputs, outputs): with Options(local=self.options.local, collect=[i] if self.collect else None): yield function(i, o, None, includes)
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) mapper = parse_function(self.mapper, PythonMapper) inputs = parse_input_list(self.inputs) includes = parse_input_list(self.includes) output = self.outputs nest = CurrentNest() for map_input in groups(inputs, self.group): map_output = next(nest.stash) with Options(local=self.options.local, collect=map_input if self.collect else None): yield mapper(map_input, map_output, includes)
def __init__(self, work_dir=None, dag_path=None, stash=None, barrier=None, wrapper=None, track_imports=True, track_exports=True): self.work_dir = work_dir or '.' self.tasks = [] self.parent = CurrentNest() if self.parent: self.work_dir = os.path.join(self.parent.work_dir, self.work_dir) self.stash = stash or Stash(root=os.path.join(self.work_dir, '_Stash')) if not os.path.exists(self.work_dir): make_directory(self.work_dir) Makeflow.__init__(self, wrapper=wrapper, track_imports=track_imports, track_exports=track_exports) self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow') self.dag_file = open(self.dag_path, 'w') self.includes.add(self.dag_path) # TODO: fix work_dir so it can be translated by makeflow_link if barrier: self.includes.update(parse_input_list(barrier)) # Since Abstractions and SubNests are not compiled immediately, these # objects must regster with their parent Nest, who will compile them in # the order that they are registered to ensure proper semantics. self.futures = [] if self.parent: debug(D_NEST, 'Register child {0} with parent {1}'.format( self, self.parent)) self.parent.futures.append((self, True)) debug(D_NEST, 'Created {0}'.format(self))
def __init__(self, function, inputs=None, outputs=None, includes=None, native=False, group=None, collect=False, local=False): # Must set id before we call Dataset.__init__ due to debugging # statement in said function. self.id = next(self.Counter) self.function = function self.inputs = inputs self.outputs = outputs or '{stash}' self.includes = includes self.native = native self.group = group or 0 self.local = local Dataset.__init__(self) if collect: self.collect = parse_input_list(self.inputs) else: self.collect = None self.options = Options(local=self.local, collect=self.collect) self.nest.futures.append((self, False)) debug(D_ABSTRACTION, 'Registered Abstraction {0} with {1}'.format(self, self.nest))
def __init__(self, work_dir=None, dag_path=None, stash=None, barrier=None, wrapper=None, track_imports=True, track_exports=True): self.work_dir = work_dir or '.' self.tasks = [] self.parent = CurrentNest() if self.parent: self.work_dir = os.path.join(self.parent.work_dir, self.work_dir) self.stash = stash or Stash(root=os.path.join(self.work_dir, '_Stash')) if not os.path.exists(self.work_dir): make_directory(self.work_dir) Makeflow.__init__(self, wrapper=wrapper, track_imports=track_imports, track_exports=track_exports) self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow') self.dag_file = open(self.dag_path, 'w') self.includes.add(self.dag_path) # TODO: fix work_dir so it can be translated by makeflow_link if barrier: self.includes.update(parse_input_list(barrier)) # Since Abstractions and SubNests are not compiled immediately, these # objects must regster with their parent Nest, who will compile them in # the order that they are registered to ensure proper semantics. self.futures = [] if self.parent: debug( D_NEST, 'Register child {0} with parent {1}'.format(self, self.parent)) self.parent.futures.append((self, True)) debug(D_NEST, 'Created {0}'.format(self))
def _generate(self): with self: debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self)) function = parse_function(self.function) includes = parse_input_list(self.includes) # First format inputs and figure out the number of iteration to perform group_size = 0 inputs = [] if isinstance(self.inputs, list): # If inputs is a matrix if isinstance(self.inputs[0], list): for i, ingroup in enumerate(self.inputs): inputs.append(parse_input_list(ingroup)) if group_size == 0: group_size = len(ingroup) if len(ingroup) != group_size: raise IOError( "Iteration group size are different between inputs!" ) # If inputs is a simple list else: group_size = len(self.inputs) inputs = parse_input_list(self.inputs) # If inputs is a string else: group_size = 1 inputs = parse_input_list(self.inputs) for iter in range(group_size): iteration_inputs = [] if isinstance(inputs[0], list): for i, input in enumerate(inputs): iteration_inputs.append(input[iter]) else: iteration_inputs.append(inputs[iter]) input_pattern = self._longestCommonSubstr( list( map(os.path.basename, list(map(str, iteration_inputs))))) iteration_outputs = [] if isinstance(self.outputs, list): # If outputs is a matrix if isinstance(self.outputs[0], list): for i, outgroup in enumerate(self.outputs): iteration_outputs.append(outgroup[iter]) # If inputs is a simple list and a motif table elif isinstance(self.outputs[0], str) and '{' in self.outputs[0]: for motif in self.outputs: iteration_outputs.extend( parse_output_list(motif, input_pattern)) # If a simple string table elif isinstance(self.outputs[0], str): iteration_outputs = parse_output_list( self.outputs[iter], input_pattern) # If inputs is a string else: iteration_outputs = parse_output_list( self.outputs, input_pattern) with Options(local=self.options.local): yield function(iteration_inputs, iteration_outputs, self.arguments, includes)