Exemple #1
0
    def __call__(self, inputs=None, outputs=None, arguments=None,
        includes=None, local=False, environment=None, collect=False):
        abstraction = CurrentAbstraction()
        nest        = CurrentNest()

        # Engine Functions define inputs and output member attributes
        try:
            inputs  = inputs  or self.inputs
            outputs = outputs or self.outputs
        except AttributeError:
            pass

        inputs   = parse_input_list(inputs)
        outputs  = parse_output_list(outputs, inputs)
        includes = parse_input_list(includes) + parse_input_list(self.includes)
        command  = self.command_format(inputs, outputs, arguments)
        options  = Options(environment=dict(self.environment), collect=inputs if collect else None)

        if local:
            options.local = True

        if environment:
            options.environment.update(environment)

        if nest.batch:
            options.batch = nest.batch

        nest.schedule(abstraction, self, command,
            list(inputs) + list(includes), outputs, options, nest.symbol)

        return outputs
Exemple #2
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output = self.outputs
            nest = CurrentNest()

            if not os.path.isabs(output):
                output = os.path.join(nest.work_dir, output)

            while len(inputs) > self.group:
                next_inputs = []
                for group in groups(inputs, self.group):
                    output_file = next(nest.stash)
                    next_inputs.append(output_file)
                    with Options(local=self.options.local,
                                 collect=group if self.collect else None):
                        yield function(group, output_file, None, includes)
                inputs = next_inputs

            with Options(local=self.options.local,
                         collect=inputs if self.collect else None):
                yield function(inputs, output, None, includes)
Exemple #3
0
    def __call__(self,
                 inputs=None,
                 outputs=None,
                 arguments=None,
                 includes=None,
                 local=False,
                 environment=None,
                 collect=False):
        abstraction = CurrentAbstraction()
        nest = CurrentNest()

        # Engine Functions define inputs and output member attributes
        try:
            inputs = inputs or self.inputs
            outputs = outputs or self.outputs
        except AttributeError:
            pass

        inputs = parse_input_list(inputs)
        outputs = parse_output_list(outputs, inputs)
        includes = parse_input_list(includes) + parse_input_list(self.includes)
        command = self.command_format(inputs, outputs, arguments)
        options = Options(environment=dict(self.environment),
                          collect=inputs if collect else None)

        if local:
            options.local = True

        if environment:
            options.environment.update(environment)

        nest.schedule(abstraction, self, command,
                      list(inputs) + list(includes), outputs, options)

        return outputs
Exemple #4
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs_a = parse_input_list(self.inputs_a)
            inputs_b = parse_input_list(self.inputs_b)
            includes = parse_input_list(self.includes)

            # If native is enabled, then use allpairs_master, otherwise
            # generate tasks as part of the DAG.
            #
            # Note: parse_output_list flattens inputs, so we need to manually
            # translate pairs into a single string.
            if self.native:
                # Store inputs A and B lists as required by allpairs_master
                inputs_a_file = next(self.nest.stash)
                with open(inputs_a_file, 'w') as fs:
                    for input_file in map(str, inputs_a):
                        fs.write(input_file + '\n')

                inputs_b_file = next(self.nest.stash)
                with open(inputs_b_file, 'w') as fs:
                    for input_file in map(str, inputs_b):
                        fs.write(input_file + '\n')

                inputs  = [inputs_a_file, inputs_b_file]
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # Schedule allpairs_master
                with Options(local=True, collect=[i] if self.collect else None):
                    allpairs_master = parse_function(
                        'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port))
                    yield allpairs_master(inputs, outputs, function.path, includes + [function.path])
            else:
                inputs  = list(itertools.product(inputs_a, inputs_b))
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # We use a wrapper script to collect the output of the
                # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as
                # used by allpairs_master.
                for i, o in zip(inputs, outputs):
                    tmp_output = next(self.nest.stash)

                    with Options(local=self.options.local, collect=[i] if self.collect else None):
                        output = function(i, tmp_output, None, includes)

                    # Wrapper script should run locally and we should always
                    # try to collect the temporary intermediate output file.
                    with Options(local=True, collect=[tmp_output]):
                        yield AllPairsCompareWrapper(output, o, [os.path.basename(str(p)) for p in i], None)
Exemple #5
0
def CurrentOptions():
    """ Return current Weaver Options.

    .. note::
        Script-level options will override local options.
    """
    from weaver.options import Options
    top = WeaverOptions.top() or Options()
    return Options(cpu=CurrentScript().options.cpu or top.cpu,
                   memory=CurrentScript().options.memory or top.memory,
                   disk=CurrentScript().options.disk or top.disk,
                   batch=CurrentScript().options.batch or top.batch,
                   local=CurrentScript().options.local or top.local)
Exemple #6
0
    def __init__(self, function=None, force=False, import_builtins=True, output_directory=None,
                 execute_dag=False, engine_wrapper=None, engine_arguments=None, args=[]):
        self.function = function
        self.arguments = args
        self.force = force # Ignore warnings
        self.import_builtins = True # Load built-ins
        if output_directory is None:
            self.output_directory = os.curdir # Where to create artifacts
        else:
            self.output_directory = output_directory
        self.start_time = time.time() # Record beginning of compiling
        self.options = Options()
        self.nested_abstractions = False
        self.inline_tasks = 1
        self.execute_dag         = execute_dag
        self.globals             = {}
        self.engine_wrapper      = engine_wrapper
        self.engine_arguments    = engine_arguments
        self.include_symbols     = False

        debug(D_SCRIPT, 'force               = {0}'.format(self.force))
        debug(D_SCRIPT, 'import_builtins     = {0}'.format(self.import_builtins))
        debug(D_SCRIPT, 'output_directory    = {0}'.format(self.output_directory))
        debug(D_SCRIPT, 'start_time          = {0}'.format(self.start_time))
        debug(D_SCRIPT, 'options             = {0}'.format(self.options))
        debug(D_SCRIPT, 'nested_abstractions = {0}'.format(self.nested_abstractions))
        debug(D_SCRIPT, 'inline_tasks        = {0}'.format(self.inline_tasks))
        debug(D_SCRIPT, 'execute_dag         = {0}'.format(self.execute_dag))
        debug(D_SCRIPT, 'engine_wrapper      = {0}'.format(self.engine_wrapper))
        debug(D_SCRIPT, 'engine_arguments    = {0}'.format(self.engine_arguments))
Exemple #7
0
    def __init__(self,
                 function,
                 inputs=None,
                 outputs=None,
                 includes=None,
                 native=False,
                 group=None,
                 collect=False,
                 local=False):
        # Must set id before we call Dataset.__init__ due to debugging
        # statement in said function.
        self.id = next(self.Counter)
        self.function = function
        self.inputs = inputs
        self.outputs = outputs or '{stash}'
        self.includes = includes
        self.native = native
        self.group = group or 0
        self.local = local
        Dataset.__init__(self)

        if collect:
            self.collect = parse_input_list(self.inputs)
        else:
            self.collect = None
        self.options = Options(local=self.local, collect=self.collect)

        self.nest.futures.append((self, False))
        debug(D_ABSTRACTION,
              'Registered Abstraction {0} with {1}'.format(self, self.nest))
Exemple #8
0
    def __init__(self, args):
        self.path = None
        self.force = False  # Ignore warnings
        self.import_builtins = True  # Load built-ins
        self.output_directory = os.curdir  # Where to create artifacts
        self.start_time = time.time()  # Record beginning of compiling
        self.options = Options()
        self.nested_abstractions = False
        self.inline_tasks = 1
        self.execute_dag = False
        self.globals = {}
        self.engine_wrapper = None
        self.engine_arguments = None
        self.include_symbols = False
        self.normalize_paths = True

        args = collections.deque(args)
        while args:
            arg = args.popleft()
            try:
                if arg.startswith('-'):
                    self.SCRIPT_OPTIONS_TABLE[arg](self, args)
                else:
                    self.path = arg
                    self.arguments = list(args)
                    args.clear()
            except (IndexError, KeyError):
                fatal(D_SCRIPT, 'invalid command line option: {0}'.format(arg))

        if self.normalize_paths:
            self.output_directory = os.path.abspath(self.output_directory)

        debug(D_SCRIPT, 'path                = {0}'.format(self.path))
        debug(D_SCRIPT, 'force               = {0}'.format(self.force))
        debug(D_SCRIPT,
              'import_builtins     = {0}'.format(self.import_builtins))
        debug(D_SCRIPT,
              'output_directory    = {0}'.format(self.output_directory))
        debug(D_SCRIPT, 'start_time          = {0}'.format(self.start_time))
        debug(D_SCRIPT, 'options             = {0}'.format(self.options))
        debug(D_SCRIPT,
              'nested_abstractions = {0}'.format(self.nested_abstractions))
        debug(D_SCRIPT, 'inline_tasks        = {0}'.format(self.inline_tasks))
        debug(D_SCRIPT, 'execute_dag         = {0}'.format(self.execute_dag))
        debug(D_SCRIPT,
              'engine_wrapper      = {0}'.format(self.engine_wrapper))
        debug(D_SCRIPT,
              'engine_arguments    = {0}'.format(self.engine_arguments))
        debug(D_SCRIPT,
              'normalize_paths     = {0}'.format(self.normalize_paths))

        if self.path is None:
            self.show_usage()
Exemple #9
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            outputs  = parse_output_list(self.outputs, inputs)
            includes = parse_input_list(self.includes)

            for i, o in zip(inputs, outputs):
                with Options(local=self.options.local, collect=[i] if self.collect else None):
                    yield function(i, o, None, includes)
Exemple #10
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            mapper   = parse_function(self.mapper, PythonMapper)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            for map_input in groups(inputs, self.group):
                map_output = next(nest.stash)
                with Options(local=self.options.local, collect=map_input if self.collect else None):
                    yield mapper(map_input, map_output, includes)
Exemple #11
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            includes = parse_input_list(self.includes)

            # First format inputs and figure out the number of iteration to perform
            group_size = 0
            inputs = []
            if isinstance(self.inputs, list):
                # If inputs is a matrix
                if isinstance(self.inputs[0], list):
                    for i, ingroup in enumerate(self.inputs):
                        inputs.append(parse_input_list(ingroup))
                        if group_size == 0: group_size = len(ingroup)
                        if len(ingroup) != group_size:
                            raise IOError(
                                "Iteration group size are different between inputs!"
                            )
                # If inputs is a simple list
                else:
                    group_size = len(self.inputs)
                    inputs = parse_input_list(self.inputs)
            # If inputs is a string
            else:
                group_size = 1
                inputs = parse_input_list(self.inputs)

            for iter in range(group_size):

                iteration_inputs = []
                if isinstance(inputs[0], list):
                    for i, input in enumerate(inputs):
                        iteration_inputs.append(input[iter])
                else:
                    iteration_inputs.append(inputs[iter])

                input_pattern = self._longestCommonSubstr(
                    list(
                        map(os.path.basename, list(map(str,
                                                       iteration_inputs)))))

                iteration_outputs = []
                if isinstance(self.outputs, list):
                    # If outputs is a matrix
                    if isinstance(self.outputs[0], list):
                        for i, outgroup in enumerate(self.outputs):
                            iteration_outputs.append(outgroup[iter])
                    # If inputs is a simple list and a motif table
                    elif isinstance(self.outputs[0],
                                    str) and '{' in self.outputs[0]:
                        for motif in self.outputs:
                            iteration_outputs.extend(
                                parse_output_list(motif, input_pattern))
                    # If a simple string table
                    elif isinstance(self.outputs[0], str):
                        iteration_outputs = parse_output_list(
                            self.outputs[iter], input_pattern)
                # If inputs is a string
                else:
                    iteration_outputs = parse_output_list(
                        self.outputs, input_pattern)

                with Options(local=self.options.local):
                    yield function(iteration_inputs, iteration_outputs,
                                   self.arguments, includes)