Exemple #1
0
    def __call__(self, inputs=None, outputs=None, arguments=None,
        includes=None, local=False, environment=None, collect=False):
        abstraction = CurrentAbstraction()
        nest        = CurrentNest()

        # Engine Functions define inputs and output member attributes
        try:
            inputs  = inputs  or self.inputs
            outputs = outputs or self.outputs
        except AttributeError:
            pass

        inputs   = parse_input_list(inputs)
        outputs  = parse_output_list(outputs, inputs)
        includes = parse_input_list(includes) + parse_input_list(self.includes)
        command  = self.command_format(inputs, outputs, arguments)
        options  = Options(environment=dict(self.environment), collect=inputs if collect else None)

        if local:
            options.local = True

        if environment:
            options.environment.update(environment)

        if nest.batch:
            options.batch = nest.batch

        nest.schedule(abstraction, self, command,
            list(inputs) + list(includes), outputs, options, nest.symbol)

        return outputs
Exemple #2
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output = self.outputs
            nest = CurrentNest()

            if not os.path.isabs(output):
                output = os.path.join(nest.work_dir, output)

            while len(inputs) > self.group:
                next_inputs = []
                for group in groups(inputs, self.group):
                    output_file = next(nest.stash)
                    next_inputs.append(output_file)
                    with Options(local=self.options.local,
                                 collect=group if self.collect else None):
                        yield function(group, output_file, None, includes)
                inputs = next_inputs

            with Options(local=self.options.local,
                         collect=inputs if self.collect else None):
                yield function(inputs, output, None, includes)
Exemple #3
0
    def __call__(self,
                 inputs=None,
                 outputs=None,
                 arguments=None,
                 includes=None,
                 local=False,
                 environment=None,
                 collect=False):
        abstraction = CurrentAbstraction()
        nest = CurrentNest()

        # Engine Functions define inputs and output member attributes
        try:
            inputs = inputs or self.inputs
            outputs = outputs or self.outputs
        except AttributeError:
            pass

        inputs = parse_input_list(inputs)
        outputs = parse_output_list(outputs, inputs)
        includes = parse_input_list(includes) + parse_input_list(self.includes)
        command = self.command_format(inputs, outputs, arguments)
        options = Options(environment=dict(self.environment),
                          collect=inputs if collect else None)

        if local:
            options.local = True

        if environment:
            options.environment.update(environment)

        nest.schedule(abstraction, self, command,
                      list(inputs) + list(includes), outputs, options)

        return outputs
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            includes = parse_input_list(self.includes)
            
            # First format inputs and figure out the number of iteration to perform
            group_size = 0
            inputs = []
            if isinstance(self.inputs, list):
                # If inputs is a matrix 
                if isinstance(self.inputs[0], list):
                    for i, ingroup in enumerate(self.inputs):
                        inputs.append(parse_input_list(ingroup))
                        if group_size == 0: group_size = len(ingroup)
                        if len(ingroup) != group_size:
                            raise IOError("Iteration group size are different between inputs!")
                # If inputs is a simple list
                else:
                    group_size = len(self.inputs)
                    inputs = parse_input_list(self.inputs)
            # If inputs is a string
            else:
                group_size = 1
                inputs = parse_input_list(self.inputs)            
            
            for iter in range(group_size):
                
                iteration_inputs = []
                if isinstance(inputs[0], list):
                    for i, input in enumerate(inputs):
                        iteration_inputs.append(input[iter])
                else:
                    iteration_inputs.append(inputs[iter])
                    
                input_pattern = self._longestCommonSubstr(list(map(os.path.basename, list(map(str, iteration_inputs)))))
                
                iteration_outputs = []
                if isinstance(self.outputs, list):
                    # If outputs is a matrix
                    if isinstance(self.outputs[0], list):
                        for i, outgroup in enumerate(self.outputs):
                            iteration_outputs.append(outgroup[iter])
                    # If inputs is a simple list and a motif table
                    elif isinstance(self.outputs[0], str) and '{' in self.outputs[0]:
                        for motif in self.outputs:
                            iteration_outputs.extend(parse_output_list(motif, input_pattern))
                    # If a simple string table
                    elif isinstance(self.outputs[0], str):
                        iteration_outputs = parse_output_list(self.outputs[iter], input_pattern)
                # If inputs is a string
                else:
                    iteration_outputs = parse_output_list(self.outputs, input_pattern)
                
                with Options(local=self.options.local):
                    yield function(iteration_inputs, iteration_outputs, None, includes)
Exemple #5
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs_a = parse_input_list(self.inputs_a)
            inputs_b = parse_input_list(self.inputs_b)
            includes = parse_input_list(self.includes)

            # If native is enabled, then use allpairs_master, otherwise
            # generate tasks as part of the DAG.
            #
            # Note: parse_output_list flattens inputs, so we need to manually
            # translate pairs into a single string.
            if self.native:
                # Store inputs A and B lists as required by allpairs_master
                inputs_a_file = next(self.nest.stash)
                with open(inputs_a_file, 'w') as fs:
                    for input_file in map(str, inputs_a):
                        fs.write(input_file + '\n')

                inputs_b_file = next(self.nest.stash)
                with open(inputs_b_file, 'w') as fs:
                    for input_file in map(str, inputs_b):
                        fs.write(input_file + '\n')

                inputs  = [inputs_a_file, inputs_b_file]
                outputs = parse_output_list(self.outputs,
                            map(lambda p: '_'.join(
                                map(lambda s: os.path.basename(str(s)), p)),inputs))

                # Schedule allpairs_master
                with Options(local=True, collect=[i] if self.collect else None):
                    allpairs_master = parse_function(
                        'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port))
                    yield allpairs_master(inputs, outputs, function.path, includes + [function.path])
            else:
                inputs  = list(itertools.product(inputs_a, inputs_b))
                outputs = parse_output_list(self.outputs,
                            map(lambda p: '_'.join(
                                map(lambda s: os.path.basename(str(s)), p)),inputs))

                # We use a wrapper script to collect the output of the
                # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as
                # used by allpairs_master.
                for i, o in zip(inputs, outputs):
                    tmp_output = next(self.nest.stash)

                    with Options(local=self.options.local, collect=[i] if self.collect else None):
                        output = function(i, tmp_output, None, includes)

                    # Wrapper script should run locally and we should always
                    # try to collect the temporary intermediate output file.
                    with Options(local=True, collect=[tmp_output]):
                        yield AllPairsCompareWrapper(output, o, map(lambda p: os.path.basename(str(p)), i), None)
Exemple #6
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs_a = parse_input_list(self.inputs_a)
            inputs_b = parse_input_list(self.inputs_b)
            includes = parse_input_list(self.includes)

            # If native is enabled, then use allpairs_master, otherwise
            # generate tasks as part of the DAG.
            #
            # Note: parse_output_list flattens inputs, so we need to manually
            # translate pairs into a single string.
            if self.native:
                # Store inputs A and B lists as required by allpairs_master
                inputs_a_file = next(self.nest.stash)
                with open(inputs_a_file, 'w') as fs:
                    for input_file in map(str, inputs_a):
                        fs.write(input_file + '\n')

                inputs_b_file = next(self.nest.stash)
                with open(inputs_b_file, 'w') as fs:
                    for input_file in map(str, inputs_b):
                        fs.write(input_file + '\n')

                inputs  = [inputs_a_file, inputs_b_file]
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # Schedule allpairs_master
                with Options(local=True, collect=[i] if self.collect else None):
                    allpairs_master = parse_function(
                        'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port))
                    yield allpairs_master(inputs, outputs, function.path, includes + [function.path])
            else:
                inputs  = list(itertools.product(inputs_a, inputs_b))
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # We use a wrapper script to collect the output of the
                # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as
                # used by allpairs_master.
                for i, o in zip(inputs, outputs):
                    tmp_output = next(self.nest.stash)

                    with Options(local=self.options.local, collect=[i] if self.collect else None):
                        output = function(i, tmp_output, None, includes)

                    # Wrapper script should run locally and we should always
                    # try to collect the temporary intermediate output file.
                    with Options(local=True, collect=[tmp_output]):
                        yield AllPairsCompareWrapper(output, o, [os.path.basename(str(p)) for p in i], None)
Exemple #7
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            outputs  = parse_output_list(self.outputs, inputs)
            includes = parse_input_list(self.includes)

            for i, o in zip(inputs, outputs):
                with Options(local=self.options.local, collect=[i] if self.collect else None):
                    yield function(i, o, None, includes)
Exemple #8
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            outputs  = parse_output_list(self.outputs, inputs)
            includes = parse_input_list(self.includes)

            for i, o in zip(inputs, outputs):
                with Options(local=self.options.local, collect=[i] if self.collect else None):
                    yield function(i, o, None, includes)
Exemple #9
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            mapper   = parse_function(self.mapper, PythonMapper)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            for map_input in groups(inputs, self.group):
                map_output = next(nest.stash)
                with Options(local=self.options.local, collect=map_input if self.collect else None):
                    yield mapper(map_input, map_output, includes)
Exemple #10
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            mapper   = parse_function(self.mapper, PythonMapper)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            for map_input in groups(inputs, self.group):
                map_output = next(nest.stash)
                with Options(local=self.options.local, collect=map_input if self.collect else None):
                    yield mapper(map_input, map_output, includes)
Exemple #11
0
    def __init__(self, work_dir=None, dag_path=None, stash=None, barrier=None,
        wrapper=None, track_imports=True, track_exports=True):
        self.work_dir = work_dir or '.'
        self.tasks    = []
        self.parent   = CurrentNest()
        if self.parent:
            self.work_dir = os.path.join(self.parent.work_dir, self.work_dir)
        self.stash    = stash or Stash(root=os.path.join(self.work_dir, '_Stash'))

        if not os.path.exists(self.work_dir):
            make_directory(self.work_dir)

        Makeflow.__init__(self, wrapper=wrapper,
            track_imports=track_imports, track_exports=track_exports)

        self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow')
        self.dag_file = open(self.dag_path, 'w')
        self.includes.add(self.dag_path)
        # TODO: fix work_dir so it can be translated by makeflow_link

        if barrier:
            self.includes.update(parse_input_list(barrier))

        # Since Abstractions and SubNests are not compiled immediately, these
        # objects must regster with their parent Nest, who will compile them in
        # the order that they are registered to ensure proper semantics.
        self.futures  = []

        if self.parent:
            debug(D_NEST, 'Register child {0} with parent {1}'.format(
                self, self.parent))
            self.parent.futures.append((self, True))

        debug(D_NEST, 'Created {0}'.format(self))
Exemple #12
0
    def __init__(self,
                 function,
                 inputs=None,
                 outputs=None,
                 includes=None,
                 native=False,
                 group=None,
                 collect=False,
                 local=False):
        # Must set id before we call Dataset.__init__ due to debugging
        # statement in said function.
        self.id = next(self.Counter)
        self.function = function
        self.inputs = inputs
        self.outputs = outputs or '{stash}'
        self.includes = includes
        self.native = native
        self.group = group or 0
        self.local = local
        Dataset.__init__(self)

        if collect:
            self.collect = parse_input_list(self.inputs)
        else:
            self.collect = None
        self.options = Options(local=self.local, collect=self.collect)

        self.nest.futures.append((self, False))
        debug(D_ABSTRACTION,
              'Registered Abstraction {0} with {1}'.format(self, self.nest))
Exemple #13
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            if not os.path.isabs(output):
                output = os.path.join(nest.work_dir, output)

            while len(inputs) > self.group:
                next_inputs = []
                for group in groups(inputs, self.group):
                    output_file = next(nest.stash)
                    next_inputs.append(output_file)
                    with Options(local=self.options.local, collect=group if self.collect else None):
                        yield function(group, output_file, None, includes)
                inputs = next_inputs

            with Options(local=self.options.local, collect=inputs if self.collect else None):
                yield function(inputs, output, None, includes)
Exemple #14
0
    def __init__(self,
                 work_dir=None,
                 dag_path=None,
                 stash=None,
                 barrier=None,
                 wrapper=None,
                 track_imports=True,
                 track_exports=True):
        self.work_dir = work_dir or '.'
        self.tasks = []
        self.parent = CurrentNest()
        if self.parent:
            self.work_dir = os.path.join(self.parent.work_dir, self.work_dir)
        self.stash = stash or Stash(root=os.path.join(self.work_dir, '_Stash'))

        if not os.path.exists(self.work_dir):
            make_directory(self.work_dir)

        Makeflow.__init__(self,
                          wrapper=wrapper,
                          track_imports=track_imports,
                          track_exports=track_exports)

        self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow')
        self.dag_file = open(self.dag_path, 'w')
        self.includes.add(self.dag_path)
        # TODO: fix work_dir so it can be translated by makeflow_link

        if barrier:
            self.includes.update(parse_input_list(barrier))

        # Since Abstractions and SubNests are not compiled immediately, these
        # objects must regster with their parent Nest, who will compile them in
        # the order that they are registered to ensure proper semantics.
        self.futures = []

        if self.parent:
            debug(
                D_NEST,
                'Register child {0} with parent {1}'.format(self, self.parent))
            self.parent.futures.append((self, True))

        debug(D_NEST, 'Created {0}'.format(self))
Exemple #15
0
    def __init__(self, function, inputs=None, outputs=None, includes=None,
        native=False, group=None, collect=False, local=False):
        # Must set id before we call Dataset.__init__ due to debugging
        # statement in said function.
        self.id         = next(self.Counter)
        self.function   = function
        self.inputs     = inputs
        self.outputs    = outputs or '{stash}'
        self.includes   = includes
        self.native     = native
        self.group      = group or 0
        self.local      = local
        Dataset.__init__(self)

        if collect:
            self.collect = parse_input_list(self.inputs)
        else:
            self.collect = None
        self.options = Options(local=self.local, collect=self.collect)

        self.nest.futures.append((self, False))
        debug(D_ABSTRACTION, 'Registered Abstraction {0} with {1}'.format(self, self.nest))
Exemple #16
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            includes = parse_input_list(self.includes)

            # First format inputs and figure out the number of iteration to perform
            group_size = 0
            inputs = []
            if isinstance(self.inputs, list):
                # If inputs is a matrix
                if isinstance(self.inputs[0], list):
                    for i, ingroup in enumerate(self.inputs):
                        inputs.append(parse_input_list(ingroup))
                        if group_size == 0: group_size = len(ingroup)
                        if len(ingroup) != group_size:
                            raise IOError(
                                "Iteration group size are different between inputs!"
                            )
                # If inputs is a simple list
                else:
                    group_size = len(self.inputs)
                    inputs = parse_input_list(self.inputs)
            # If inputs is a string
            else:
                group_size = 1
                inputs = parse_input_list(self.inputs)

            for iter in range(group_size):

                iteration_inputs = []
                if isinstance(inputs[0], list):
                    for i, input in enumerate(inputs):
                        iteration_inputs.append(input[iter])
                else:
                    iteration_inputs.append(inputs[iter])

                input_pattern = self._longestCommonSubstr(
                    list(
                        map(os.path.basename, list(map(str,
                                                       iteration_inputs)))))

                iteration_outputs = []
                if isinstance(self.outputs, list):
                    # If outputs is a matrix
                    if isinstance(self.outputs[0], list):
                        for i, outgroup in enumerate(self.outputs):
                            iteration_outputs.append(outgroup[iter])
                    # If inputs is a simple list and a motif table
                    elif isinstance(self.outputs[0],
                                    str) and '{' in self.outputs[0]:
                        for motif in self.outputs:
                            iteration_outputs.extend(
                                parse_output_list(motif, input_pattern))
                    # If a simple string table
                    elif isinstance(self.outputs[0], str):
                        iteration_outputs = parse_output_list(
                            self.outputs[iter], input_pattern)
                # If inputs is a string
                else:
                    iteration_outputs = parse_output_list(
                        self.outputs, input_pattern)

                with Options(local=self.options.local):
                    yield function(iteration_inputs, iteration_outputs,
                                   self.arguments, includes)