Example #1
0
    def _query(self, filters, **parameters):
        debug(D_DATASET, 'Querying Dataset: {0}'.format(self.cache_path))
        try:
            limit = parameters['limit']
        except KeyError:
            limit = None

        # For each item in the Dataset, apply each filter; if all filters
        # succeed, then yield item.
        count = 0
        for o in iter(self):
            do_yield = True

            for f in filters:
                if not f(o):
                    do_yield = False
                    break

            if do_yield:
                count += 1
                yield o

            # Break out if we reach limit.
            if limit is not None and count == limit:
                break
Example #2
0
    def compile(self):
        """ Compile Abstractions to generate tasks and output file lists. """
        # Compile Abstractions and SubNests to ensure they have generated
        # tasks.
        debug(D_NEST, 'Compiling Abstractions and SubNests for {0}'.format(self))
        for future, is_nest in self.futures:
            if is_nest:
                with future:
                    future.compile()
                future()
            else:
                future.compile()

        # Perform optimizations.
        debug(D_NEST, 'Optimizing tasks for {0}'.format(self))
        self._optimize_nested_abstractions()
        self._optimize_inline_tasks()

        # Emit stored tasks to workflow DAG using engine.
        debug(D_NEST, 'Emitting tasks for {0}'.format(self))
        for task in self.tasks:
            self.emit_task(*task)

        # Emit variables and exports
        debug(D_NEST, 'Emitting variables for {0}'.format(self))
        self.emit_variables()

        debug(D_NEST, 'Emitting exports for {0}'.format(self))
        self.emit_exports()
Example #3
0
    def execute(self, arguments=None, exit_on_failure=False):
        """ Execute DAG using Makeflow. """
        if self.dag_file is None:
            raise WeaverError(D_ENGINE, 'Cannot execute an empty DAG')

        # Ensure that DAG is written to disk.
        self.dag_file.flush()

        # Execute emitted DAG from the current Nest path.
        try:
            command_list = [self.path, os.path.relpath(self.dag_path, self.work_dir)]
            if self.wrapper:
                command_list.insert(0, self.wrapper)
            if arguments:
                command_list.extend(arguments.split())
            debug(D_ENGINE, 'Executing DAG {0} using {1} in {2}'.format(
                self.dag_path, self.path, self.work_dir))
            subprocess.check_call(command_list, cwd=self.work_dir)
        except subprocess.CalledProcessError as e:
            if exit_on_failure:
                log_func = fatal
            else:
                log_func = warn

            log_func(D_ENGINE, 'Failed to execute DAG {0} using {1}:\n{2}'.format(
                self.dag_path, self.path, e))
Example #4
0
    def __init__(self,
                 function,
                 inputs=None,
                 outputs=None,
                 includes=None,
                 native=False,
                 group=None,
                 collect=False,
                 local=False):
        # Must set id before we call Dataset.__init__ due to debugging
        # statement in said function.
        self.id = next(self.Counter)
        self.function = function
        self.inputs = inputs
        self.outputs = outputs or '{stash}'
        self.includes = includes
        self.native = native
        self.group = group or 0
        self.local = local
        Dataset.__init__(self)

        if collect:
            self.collect = parse_input_list(self.inputs)
        else:
            self.collect = None
        self.options = Options(local=self.local, collect=self.collect)

        self.nest.futures.append((self, False))
        debug(D_ABSTRACTION,
              'Registered Abstraction {0} with {1}'.format(self, self.nest))
Example #5
0
    def _optimize_inline_tasks(self):
        """ Group tasks by abstraction and function and then break them into
        sub-groups and schedule the sub-groups has sub DAGs.
        """
        if CurrentScript().inline_tasks <= 1:
            return

        debug(D_NEST, 'Inlining tasks for {0}'.format(self))

        # Group tasks into bins based on Function.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            function    = task[1]
            task_dict[(abstraction, function)].append(task)

        # For each set of tasks, split the set into small sub-groups; for each
        # sub-group, create a new InlineNest and schedule the tasks there.
        self.tasks = []
        for (abstraction, function), tasks in list(task_dict.items()):
            inline_tasks = max(CurrentScript().inline_tasks, abstraction.group)
            if inline_tasks < len(tasks):
                for group in groups(tasks, inline_tasks):
                    with InlineNest() as inline_nest:
                        for task in group:
                            inline_nest.schedule(*task)
                        inline_nest.compile()
                    with abstraction.options:
                        inline_nest()
            else:
                for task in tasks:
                    self.tasks.append(task)
Example #6
0
    def compile(self):
        """ Compile script in the specified working directory. """
        # Save active script instance and set this one as active
        work_dir = self.output_directory

        # Add nest path and path to script to Python module path to allow
        # for importing modules outside of $PYTHONPATH
        sys.path.insert(0, os.path.abspath(os.path.dirname(work_dir)))

        # Load built-ins if specified on command line.  If built-ins are
        # not automatically loaded by the Script object, then the user must
        # load them manually in their Weaver scripts using the standard
        # Python import facilities.
        if self.import_builtins:
            self._import('abstraction', ABSTRACTIONS)
            self._import('dataset', DATASETS)
            self._import('function', FUNCTIONS)
            self._import('nest', NESTS)
            self._import('options', OPTIONS)
            self._import('stack', STACKS)

        # Execute nest
        with Nest(work_dir, wrapper=self.engine_wrapper) as nest:
            with self.options:
                try:
                    self.function(*self.arguments)
                    nest.compile()
                except Exception as e:
                    fatal(D_SCRIPT, 'Error compiling script: {0}'.format(e), print_traceback=True)

                if self.execute_dag:
                    debug(D_SCRIPT, 'Executing generated DAG {0} with {1}'.format(
                        nest.dag_path, nest.path))
                    nest.execute(self.engine_arguments, exit_on_failure=True)
Example #7
0
    def _optimize_inline_tasks(self):
        """ Group tasks by abstraction and function and then break them into
        sub-groups and schedule the sub-groups has sub DAGs.
        """
        if CurrentScript().inline_tasks <= 1:
            return

        debug(D_NEST, 'Inlining tasks for {0}'.format(self))

        # Group tasks into bins based on Function.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            function    = task[1]
            task_dict[(abstraction, function)].append(task)

        # For each set of tasks, split the set into small sub-groups; for each
        # sub-group, create a new InlineNest and schedule the tasks there.
        self.tasks = []
        for (abstraction, function), tasks in task_dict.items():
            inline_tasks = max(CurrentScript().inline_tasks, abstraction.group)
            if inline_tasks < len(tasks):
                for group in groups(tasks, inline_tasks):
                    with InlineNest() as inline_nest:
                        for task in group:
                            inline_nest.schedule(*task)
                        inline_nest.compile()
                    with abstraction.options:
                        inline_nest()
            else:
                for task in tasks:
                    self.tasks.append(task)
Example #8
0
 def compile(self):
     self.nest.symbol = self.symbol
     self.nest.batch = self.batch
     """ Compile Abstraction to produce scheduled tasks. """
     debug(D_ABSTRACTION, 'Compiling Abstraction {0}'.format(self))
     for _ in self:
         pass
Example #9
0
    def __init__(self, work_dir=None, dag_path=None, stash=None, barrier=None,
        wrapper=None, track_imports=True, track_exports=True):
        self.work_dir = work_dir or '.'
        self.tasks    = []
        self.parent   = CurrentNest()
        if self.parent:
            self.work_dir = os.path.join(self.parent.work_dir, self.work_dir)
        self.stash    = stash or Stash(root=os.path.join(self.work_dir, '_Stash'))

        if not os.path.exists(self.work_dir):
            make_directory(self.work_dir)

        Makeflow.__init__(self, wrapper=wrapper,
            track_imports=track_imports, track_exports=track_exports)

        self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow')
        self.dag_file = open(self.dag_path, 'w')
        self.includes.add(self.dag_path)
        # TODO: fix work_dir so it can be translated by makeflow_link

        if barrier:
            self.includes.update(parse_input_list(barrier))

        # Since Abstractions and SubNests are not compiled immediately, these
        # objects must regster with their parent Nest, who will compile them in
        # the order that they are registered to ensure proper semantics.
        self.futures  = []

        if self.parent:
            debug(D_NEST, 'Register child {0} with parent {1}'.format(
                self, self.parent))
            self.parent.futures.append((self, True))

        debug(D_NEST, 'Created {0}'.format(self))
Example #10
0
    def __init__(self, dataset, *filters, **parameters):
        Dataset.__init__(self, cursor=dataset.c)
        self._dataset    = dataset
        self._filters    = filters
        self._parameters = parameters

        debug(D_DATASET, 'Created Query: {0}'.format(self.cache_path))
Example #11
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output = self.outputs
            nest = CurrentNest()

            if not os.path.isabs(output):
                output = os.path.join(nest.work_dir, output)

            while len(inputs) > self.group:
                next_inputs = []
                for group in groups(inputs, self.group):
                    output_file = next(nest.stash)
                    next_inputs.append(output_file)
                    with Options(local=self.options.local,
                                 collect=group if self.collect else None):
                        yield function(group, output_file, None, includes)
                inputs = next_inputs

            with Options(local=self.options.local,
                         collect=inputs if self.collect else None):
                yield function(inputs, output, None, includes)
Example #12
0
    def compile(self):
        """ Compile Abstractions to generate tasks and output file lists. """
        # Compile Abstractions and SubNests to ensure they have generated
        # tasks.
        debug(D_NEST, 'Compiling Abstractions and SubNests for {0}'.format(self))
        for future, is_nest in self.futures:
            if is_nest:
                with future:
                    future.compile()
                future()
            else:
                future.compile()

        # Perform optimizations.
        debug(D_NEST, 'Optimizing tasks for {0}'.format(self))
        self._optimize_nested_abstractions()
        self._optimize_inline_tasks()

        # Emit stored tasks to workflow DAG using engine.
        debug(D_NEST, 'Emitting tasks for {0}'.format(self))
        for task in self.tasks:
            self.emit_task(*task)

        # Emit variables and exports
        debug(D_NEST, 'Emitting variables for {0}'.format(self))
        self.emit_variables()

        debug(D_NEST, 'Emitting exports for {0}'.format(self))
        self.emit_exports()
Example #13
0
    def execute(self, arguments=None, exit_on_failure=False):
        """ Execute DAG using Makeflow. """
        if self.dag_file is None:
            raise WeaverError(D_ENGINE, 'Cannot execute an empty DAG')

        # Ensure that DAG is written to disk.
        self.dag_file.flush()

        # Execute emitted DAG from the current Nest path.
        try:
            command_list = [
                self.path,
                os.path.relpath(self.dag_path, self.work_dir)
            ]
            if self.wrapper:
                command_list.insert(0, self.wrapper)
            if arguments:
                command_list.extend(arguments.split())
            debug(
                D_ENGINE, 'Executing DAG {0} using {1} in {2}'.format(
                    self.dag_path, self.path, self.work_dir))
            subprocess.check_call(command_list, cwd=self.work_dir)
        except subprocess.CalledProcessError as e:
            if exit_on_failure:
                log_func = fatal
            else:
                log_func = warn

            log_func(
                D_ENGINE, 'Failed to execute DAG {0} using {1}:\n{2}'.format(
                    self.dag_path, self.path, e))
Example #14
0
    def _query(self, filters, **parameters):
        debug(D_DATASET, 'Querying Dataset: {0}'.format(self.cache_path))
        try:
            limit = parameters['limit']
        except KeyError:
            limit = None

        # For each item in the Dataset, apply each filter; if all filters
        # succeed, then yield item.
        count = 0
        for o in iter(self):
            do_yield = True

            for f in filters:
                if not f(o):
                    do_yield = False
                    break

            if do_yield:
                count += 1
                yield o

            # Break out if we reach limit.
            if limit is not None and count == limit:
                break
Example #15
0
    def __init__(self, dataset, *filters, **parameters):
        Dataset.__init__(self, cursor=dataset.c)
        self._dataset = dataset
        self._filters = filters
        self._parameters = parameters

        debug(D_DATASET, 'Created Query: {0}'.format(self.cache_path))
Example #16
0
def run_concurrent(func_name, tasks, *func_args):
    debug(D_USER, 'Generating Concurrent Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    Iterate(function, tasks, '{NUMBER}.output')
Example #17
0
def run_concurrent(func_name, tasks, *func_args):
    debug(D_USER, 'Generating Concurrent Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    Iterate(function, tasks, '{NUMBER}.output')
Example #18
0
    def connect(self):
        debug(D_DATASET, 'Connecting to {0}'.format(self))

        self.db_conn = MySQLConnect(host=self.db_host,
                                    db=self.db_name,
                                    user=self.db_user,
                                    passwd=self.db_pass,
                                    cursorclass=MySQLSSDictCursor)
Example #19
0
def parse_output_list(output_list=None, input_list=None):
    """ Return an :func:`~weaver.util.iterable` object of output files.

    If `output_list` is ``None``, then return ``[]``.  If `output_list` is a
    string template, then use it to generate a list of :class:`File`
    objects.  If `output_list` is already an :func:`~weaver.util.iterable`,
    then map :class:`File` to `output_list` and return it.

    This means that `output_list` must be one of the following:

    1. ``None`` to leave it to the caller to generate an output file object.
    2. A string object to be used as a template.
    3. An :func:`~weaver.util.iterable` object (ex. list, iterator, etc.).

    If `output_list` is a string template, then it may have the following
    fields:

    - `{fullpath}`, `{FULL}`         -- Full input file path.
    - `{basename}`, `{BASE}`         -- Base input file name.
    - `{fullpath_woext}`, `{FULLWE}` -- Full input file path without extension
    - `{basename_woext}`, `{BASEWE}` -- Base input file name without extension

    """
    debug(D_DATA, 'Parsing output list')
    if output_list is None:
        return []

    if isinstance(output_list, str) or isinstance(output_list, File):
        # If input list is empty or output list is not a format string, then
        # return list of single output file.
        # TODO: support single {stash}
        if not input_list or not '{' in str(output_list):
            return [MakeFile(output_list)]

        nest = CurrentNest()
        return [
            MakeFile(
                str(output_list).format(
                    fullpath=input,
                    FULL=input,
                    i='{0:05X}'.format(i),
                    NUMBER='{0:05X}'.format(i),
                    stash=next(nest.stash) if '{stash}' in output_list else '',
                    fullpath_woext=os.path.splitext(input)[0],
                    FULL_WOEXT=os.path.splitext(input)[0],
                    basename=os.path.basename(input),
                    BASE=os.path.basename(input),
                    basename_woext=os.path.splitext(
                        os.path.basename(input))[0],
                    BASE_WOEXT=os.path.splitext(os.path.basename(input))[0]))
            for i, input in enumerate(parse_string_list(input_list))
        ]

    if iterable(output_list):
        return [MakeFile(o) for o in parse_object_list(output_list)]

    raise WeaverError(
        D_DATA, 'Could not parse output argument: {0}'.format(output_list))
Example #20
0
    def __init__(self, executable, cmd_format=None, find_dirs=None,
        environment=None):
        self.cmd_format  = cmd_format or Function.CMD_FORMAT
        self.path        = find_executable(executable, find_dirs)
        self.environment = environment or dict()
        self.includes    = set([self.path])

        debug(D_FUNCTION, 'Created Function {0}({1}, {2})'.format(
            type_str(self), self.path, self.cmd_format))
Example #21
0
    def connect(self):
        debug(D_DATASET, 'Connecting to {0}'.format(self))

        self.db_conn = MySQLConnect(
            host        = self.db_host,
            db          = self.db_name,
            user        = self.db_user,
            passwd      = self.db_pass,
            cursorclass = MySQLSSDictCursor)
Example #22
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            includes = parse_input_list(self.includes)
            
            # First format inputs and figure out the number of iteration to perform
            group_size = 0
            inputs = []
            if isinstance(self.inputs, list):
                # If inputs is a matrix 
                if isinstance(self.inputs[0], list):
                    for i, ingroup in enumerate(self.inputs):
                        inputs.append(parse_input_list(ingroup))
                        if group_size == 0: group_size = len(ingroup)
                        if len(ingroup) != group_size:
                            raise IOError("Iteration group size are different between inputs!")
                # If inputs is a simple list
                else:
                    group_size = len(self.inputs)
                    inputs = parse_input_list(self.inputs)
            # If inputs is a string
            else:
                group_size = 1
                inputs = parse_input_list(self.inputs)            
            
            for iter in range(group_size):
                
                iteration_inputs = []
                if isinstance(inputs[0], list):
                    for i, input in enumerate(inputs):
                        iteration_inputs.append(input[iter])
                else:
                    iteration_inputs.append(inputs[iter])
                    
                input_pattern = self._longestCommonSubstr(list(map(os.path.basename, list(map(str, iteration_inputs)))))
                
                iteration_outputs = []
                if isinstance(self.outputs, list):
                    # If outputs is a matrix
                    if isinstance(self.outputs[0], list):
                        for i, outgroup in enumerate(self.outputs):
                            iteration_outputs.append(outgroup[iter])
                    # If inputs is a simple list and a motif table
                    elif isinstance(self.outputs[0], str) and '{' in self.outputs[0]:
                        for motif in self.outputs:
                            iteration_outputs.extend(parse_output_list(motif, input_pattern))
                    # If a simple string table
                    elif isinstance(self.outputs[0], str):
                        iteration_outputs = parse_output_list(self.outputs[iter], input_pattern)
                # If inputs is a string
                else:
                    iteration_outputs = parse_output_list(self.outputs, input_pattern)
                
                with Options(local=self.options.local):
                    yield function(iteration_inputs, iteration_outputs, None, includes)
Example #23
0
def parse_output_list(output_list=None, input_list=None):
    """ Return an :func:`~weaver.util.iterable` object of output files.

    If `output_list` is ``None``, then return ``[]``.  If `output_list` is a
    string template, then use it to generate a list of :class:`File`
    objects.  If `output_list` is already an :func:`~weaver.util.iterable`,
    then map :class:`File` to `output_list` and return it.

    This means that `output_list` must be one of the following:

    1. ``None`` to leave it to the caller to generate an output file object.
    2. A string object to be used as a template.
    3. An :func:`~weaver.util.iterable` object (ex. list, iterator, etc.).

    If `output_list` is a string template, then it may have the following
    fields:

    - `{fullpath}`, `{FULL}`         -- Full input file path.
    - `{basename}`, `{BASE}`         -- Base input file name.
    - `{fullpath_woext}`, `{FULLWE}` -- Full input file path without extension
    - `{basename_woext}`, `{BASEWE}` -- Base input file name without extension

    """
    debug(D_DATA, 'Parsing output list')
    if output_list is None:
        return []

    if isinstance(output_list, str) or isinstance(output_list, File):
        # If input list is empty or output list is not a format string, then
        # return list of single output file.
        # TODO: support single {stash}
        if not input_list or not '{' in str(output_list):
            return [MakeFile(output_list)]

        nest = CurrentNest()
        return [MakeFile(str(output_list).format(
                    fullpath       = input,
                    FULL           = input,
                    i              = '{0:05X}'.format(i),
                    NUMBER         = '{0:05X}'.format(i),
                    stash          = next(nest.stash) if '{stash}' in output_list else '',
                    fullpath_woext = os.path.splitext(input)[0],
                    FULL_WOEXT     = os.path.splitext(input)[0],
                    basename       = os.path.basename(input),
                    BASE           = os.path.basename(input),
                    basename_woext = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0],
                    BASE_WOEXT     = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0]))
                for i, input in enumerate(parse_string_list(input_list))]

    if iterable(output_list):
        return [MakeFile(o) for o in parse_object_list(output_list)]

    raise WeaverError(D_DATA,
        'Could not parse output argument: {0}'.format(output_list))

# vim: set sts=4 sw=4 ts=8 expandtab ft=python:
Example #24
0
def run_fanout(func_name, tasks, bytes, *func_args):
    debug(D_USER, 'Generating FanOut Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    bytes     = int(bytes)
    input     = generate_input_file(bytes, 'fanout.input')
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    Iterate(function, tasks, '{NUMBER}.output', includes=input)
Example #25
0
def run_fanout(func_name, tasks, bytes, *func_args):
    debug(D_USER, 'Generating FanOut Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    bytes     = int(bytes)
    input     = generate_input_file(bytes, 'fanout.input')
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    Iterate(function, tasks, '{NUMBER}.output', includes=input)
Example #26
0
def run_chained(func_name, tasks, *func_args):
    debug(D_USER, 'Generating Chained Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    output = None
    for task in range(tasks):
        output = function(output, '{0:04d}.output'.format(task))
Example #27
0
def run_chained(func_name, tasks, *func_args):
    debug(D_USER, 'Generating Chained Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)

    output = None
    for task in range(tasks):
        output = function(output, '{0:04d}.output'.format(task))
Example #28
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs_a = parse_input_list(self.inputs_a)
            inputs_b = parse_input_list(self.inputs_b)
            includes = parse_input_list(self.includes)

            # If native is enabled, then use allpairs_master, otherwise
            # generate tasks as part of the DAG.
            #
            # Note: parse_output_list flattens inputs, so we need to manually
            # translate pairs into a single string.
            if self.native:
                # Store inputs A and B lists as required by allpairs_master
                inputs_a_file = next(self.nest.stash)
                with open(inputs_a_file, 'w') as fs:
                    for input_file in map(str, inputs_a):
                        fs.write(input_file + '\n')

                inputs_b_file = next(self.nest.stash)
                with open(inputs_b_file, 'w') as fs:
                    for input_file in map(str, inputs_b):
                        fs.write(input_file + '\n')

                inputs  = [inputs_a_file, inputs_b_file]
                outputs = parse_output_list(self.outputs,
                            map(lambda p: '_'.join(
                                map(lambda s: os.path.basename(str(s)), p)),inputs))

                # Schedule allpairs_master
                with Options(local=True, collect=[i] if self.collect else None):
                    allpairs_master = parse_function(
                        'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port))
                    yield allpairs_master(inputs, outputs, function.path, includes + [function.path])
            else:
                inputs  = list(itertools.product(inputs_a, inputs_b))
                outputs = parse_output_list(self.outputs,
                            map(lambda p: '_'.join(
                                map(lambda s: os.path.basename(str(s)), p)),inputs))

                # We use a wrapper script to collect the output of the
                # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as
                # used by allpairs_master.
                for i, o in zip(inputs, outputs):
                    tmp_output = next(self.nest.stash)

                    with Options(local=self.options.local, collect=[i] if self.collect else None):
                        output = function(i, tmp_output, None, includes)

                    # Wrapper script should run locally and we should always
                    # try to collect the temporary intermediate output file.
                    with Options(local=True, collect=[tmp_output]):
                        yield AllPairsCompareWrapper(output, o, map(lambda p: os.path.basename(str(p)), i), None)
Example #29
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs_a = parse_input_list(self.inputs_a)
            inputs_b = parse_input_list(self.inputs_b)
            includes = parse_input_list(self.includes)

            # If native is enabled, then use allpairs_master, otherwise
            # generate tasks as part of the DAG.
            #
            # Note: parse_output_list flattens inputs, so we need to manually
            # translate pairs into a single string.
            if self.native:
                # Store inputs A and B lists as required by allpairs_master
                inputs_a_file = next(self.nest.stash)
                with open(inputs_a_file, 'w') as fs:
                    for input_file in map(str, inputs_a):
                        fs.write(input_file + '\n')

                inputs_b_file = next(self.nest.stash)
                with open(inputs_b_file, 'w') as fs:
                    for input_file in map(str, inputs_b):
                        fs.write(input_file + '\n')

                inputs  = [inputs_a_file, inputs_b_file]
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # Schedule allpairs_master
                with Options(local=True, collect=[i] if self.collect else None):
                    allpairs_master = parse_function(
                        'allpairs_master -p {0} {{IN}} {{ARG}} > {{OUT}}'.format(self.port))
                    yield allpairs_master(inputs, outputs, function.path, includes + [function.path])
            else:
                inputs  = list(itertools.product(inputs_a, inputs_b))
                outputs = parse_output_list(self.outputs,
                            ['_'.join(
                                [os.path.basename(str(s)) for s in p]) for p in inputs])

                # We use a wrapper script to collect the output of the
                # comparison and put in {INPUT_A} {INPUT_B} {OUTPUT} format, as
                # used by allpairs_master.
                for i, o in zip(inputs, outputs):
                    tmp_output = next(self.nest.stash)

                    with Options(local=self.options.local, collect=[i] if self.collect else None):
                        output = function(i, tmp_output, None, includes)

                    # Wrapper script should run locally and we should always
                    # try to collect the temporary intermediate output file.
                    with Options(local=True, collect=[tmp_output]):
                        yield AllPairsCompareWrapper(output, o, [os.path.basename(str(p)) for p in i], None)
Example #30
0
    def _import(self, module, symbols):
        """ Import ``symbols`` from ``module`` into global namespace. """
        # Import module
        m = 'weaver.{0}'.format(module)
        m = __import__(m, self.globals, self.globals, symbols, -1)

        # Import symbols from module into global namespace, which we store as
        # an attribute for later use (i.e. during compile)
        for symbol in symbols:
            self.globals[symbol] = getattr(m, symbol)
            debug(D_SCRIPT, 'Imported {0} from {1}'.format(symbol, module))
Example #31
0
    def _import(self, module, symbols):
        """ Import ``symbols`` from ``module`` into global namespace. """
        # Import module
        m = 'weaver.{0}'.format(module)
        m = __import__(m, self.globals, self.globals, symbols, -1)

        # Import symbols from module into global namespace, which we store as
        # an attribute for later use (i.e. during compile)
        for symbol in symbols:
            self.globals[symbol] = getattr(m, symbol)
            debug(D_SCRIPT, 'Imported {0} from {1}'.format(symbol, module))
Example #32
0
    def emit_task(self,
                  abstraction,
                  function,
                  command,
                  inputs,
                  outputs,
                  options,
                  symbol=None):
        """ Write task to DAG file. """
        # Track inputs and outputs.
        if self.track_imports:
            for i in inputs:
                self.inputs.add(i)

        if self.track_exports:
            for o in outputs:
                self.outputs.add(o)

        debug(
            D_ENGINE, 'Emitting {0}, [{1}], [{2}], {3}'.format(
                command, ', '.join(map(str, inputs)),
                ', '.join(map(str, outputs)), options))

        # Write task outputs and inputs
        self.dag_file.write('{0}: {1}\n'.format(' '.join(map(str, outputs)),
                                                ' '.join(map(str, inputs))))

        # Write debugging symbols if enabled
        if CurrentScript().include_symbols:
            if abstraction == SENTINEL:
                self.dag_file.write(
                    '\t'.join(['', '# SYMBOL', str(function)]) + '\n')
            else:
                self.dag_file.write('\t'.join(
                    ['', '# SYMBOL', str(abstraction)]) + '\n')

        # if a symbol is provided
        if symbol:
            self.dag_file.write('@SYMBOL="' + symbol + '"\n')

        # Write environmental variables
        if options.local:
            self.dag_file.write('@BATCH_LOCAL=1\n')
        if options.batch:
            self.dag_file.write('@BATCH_OPTIONS={0}\n'.format(options.batch))
        if options.collect:
            self.dag_file.write('@_MAKEFLOW_COLLECT_LIST+={0}\n'.format(
                ' '.join(map(str, options.collect))))
        for k, v in list(options.environment.items()):
            self.dag_file.write('@{0}={1}\n'.format(k, v))

        # Write task command
        self.dag_file.write('\t{0}\n'.format(command))
        self.dag_file.flush()
Example #33
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            outputs  = parse_output_list(self.outputs, inputs)
            includes = parse_input_list(self.includes)

            for i, o in zip(inputs, outputs):
                with Options(local=self.options.local, collect=[i] if self.collect else None):
                    yield function(i, o, None, includes)
Example #34
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            outputs  = parse_output_list(self.outputs, inputs)
            includes = parse_input_list(self.includes)

            for i, o in zip(inputs, outputs):
                with Options(local=self.options.local, collect=[i] if self.collect else None):
                    yield function(i, o, None, includes)
Example #35
0
    def schedule(self, abstraction, function, command, inputs, outputs,
        options):
        """ Schedule task for execution. """
        debug(D_NEST,
            'Scheduling task({0}, {1}, {2}, [{3}], [{4}], {5}) for {6}'.format(
            abstraction, function, command,
            ', '.join(map(str, inputs)), ', '.join(map(str, outputs)),
            options, self))

        if abstraction is None:
            abstraction = SENTINEL
        self.tasks.append(
            [abstraction, function, command, inputs, outputs, options])
Example #36
0
def run_map(func_name, tasks, bytes, *func_args):
    debug(D_USER, 'Generating Map Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    bytes     = int(bytes)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)
    inputs    = []

    for input in range(tasks):
        inputs.append(generate_input_file(bytes))

    Map(function, inputs, '{BASE_WOEXT}.output')
Example #37
0
    def schedule(self, abstraction, function, command, inputs, outputs,
                 options, symbol=None):
        """ Schedule task for execution. """
        debug(D_NEST,
            'Scheduling task({0}, {1}, {2}, [{3}], [{4}], {5}) for {6}'.format(
            abstraction, function, command,
            ', '.join(map(str, inputs)), ', '.join(map(str, outputs)),
            options, self))

        if abstraction is None:
            abstraction = SENTINEL
        self.tasks.append(
            [abstraction, function, command, inputs, outputs, options, symbol])
Example #38
0
def run_map(func_name, tasks, bytes, *func_args):
    debug(D_USER, 'Generating Map Pattern with Function {0}'.format(func_name))

    tasks     = int(tasks)
    bytes     = int(bytes)
    arguments = map(int, func_args)
    function  = make_function(func_name, *arguments)
    inputs    = []

    for input in range(tasks):
        inputs.append(generate_input_file(bytes))

    Map(function, inputs, '{BASE_WOEXT}.output')
Example #39
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            mapper   = parse_function(self.mapper, PythonMapper)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            for map_input in groups(inputs, self.group):
                map_output = next(nest.stash)
                with Options(local=self.options.local, collect=map_input if self.collect else None):
                    yield mapper(map_input, map_output, includes)
Example #40
0
    def __init__(self,
                 executable,
                 cmd_format=None,
                 find_dirs=None,
                 environment=None):
        self.cmd_format = cmd_format or Function.CMD_FORMAT
        self.path = find_executable(executable, find_dirs)
        self.environment = environment or dict()
        self.includes = set([self.path])

        debug(
            D_FUNCTION,
            'Created Function {0}({1}, {2})'.format(type_str(self), self.path,
                                                    self.cmd_format))
Example #41
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            mapper   = parse_function(self.mapper, PythonMapper)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            for map_input in groups(inputs, self.group):
                map_output = next(nest.stash)
                with Options(local=self.options.local, collect=map_input if self.collect else None):
                    yield mapper(map_input, map_output, includes)
Example #42
0
def parse_input_list(input_list=None):
    """ Return an :func:`~weaver.util.iterable` object of input files.

    This just uses :func:`~weaver.util.parse_string_list` to parse the input
    and casts all the objects to :class:`File`.

    This means that `input_list` must be one of the following:

    1. ``None`` or ``[]`` for an empty list.
    2. A string object.
    3. An :func:`~weaver.util.iterable` object (ex. list, iterator, etc.).

    Where each individual element must represent an :class:`File`.
    """
    debug(D_DATA, 'Parsing input list')
    return [MakeFile(i) for i in parse_object_list(input_list)]
Example #43
0
def parse_input_list(input_list=None):
    """ Return an :func:`~weaver.util.iterable` object of input files.

    This just uses :func:`~weaver.util.parse_string_list` to parse the input
    and casts all the objects to :class:`File`.

    This means that `input_list` must be one of the following:

    1. ``None`` or ``[]`` for an empty list.
    2. A string object.
    3. An :func:`~weaver.util.iterable` object (ex. list, iterator, etc.).

    Where each individual element must represent an :class:`File`.
    """
    debug(D_DATA, 'Parsing input list')
    return [MakeFile(i) for i in parse_object_list(input_list)]
Example #44
0
    def execute(self, arguments=None, exit_on_failure=False):
        """ Execute DAG using Makeflow. """
        if self.dag_file is None:
            raise WeaverError(D_ENGINE, 'Cannot execute an empty DAG')

        # Ensure that DAG is written to disk.
        self.dag_file.flush()

        # Execute emitted DAG from the current Nest path.
        try:
            command_list = [
                self.path,
                os.path.relpath(self.dag_path, self.work_dir)
            ]
            if self.wrapper:
                command_list.insert(0, self.wrapper)
            if arguments:
                # Is the -B option has been used
                arg_groups = re.search(
                    "(-\S)?\s?(\S*)\s?(-B)\s[\"'](.*)[\"']\s?(-\S)?\s?(\S*)",
                    arguments)
                if arg_groups:
                    for arg_group in arg_groups.groups():
                        if arg_group: command_list.extend([arg_group])
                else:
                    command_list.extend(arguments.split())
            debug(
                D_ENGINE, 'Executing DAG {0} using {1} in {2}'.format(
                    self.dag_path, self.path, self.work_dir))
            subprocess.check_call(command_list, cwd=self.work_dir)
        except subprocess.CalledProcessError as e:
            """
            if exit_on_failure:
                log_func = fatal
            else:
                log_func = warn
            log_func(D_ENGINE, 'Failed to execute DAG {0} using {1}:\n{2}'.format(
                self.dag_path, self.path, e))
            """
            raise RuntimeError(
                'Failed to execute DAG {0} using {1}:\n{2}'.format(
                    self.dag_path, self.path, e))


# vim: set sts=4 sw=4 ts=8 expandtab ft=python:
Example #45
0
    def emit_task(self, abstraction, function, command, inputs, outputs, options, symbol=None):
        """ Write task to DAG file. """
        # Track inputs and outputs.
        if self.track_imports:
            for i in inputs:
                self.inputs.add(i)

        if self.track_exports:
            for o in outputs:
                self.outputs.add(o)

        debug(D_ENGINE, 'Emitting {0}, [{1}], [{2}], {3}'.format(
            command, ', '.join(map(str, inputs)), ', '.join(map(str, outputs)),
            options))

        # Write task outputs and inputs
        self.dag_file.write('{0}: {1}\n'.format(
            ' '.join(map(str, outputs)), ' '.join(map(str, inputs))))

        # Write debugging symbols if enabled
        if CurrentScript().include_symbols:
            if abstraction == SENTINEL:
                self.dag_file.write('\t'.join(['', '# SYMBOL', str(function)]) + '\n')
            else:
                self.dag_file.write('\t'.join(['', '# SYMBOL', str(abstraction)]) + '\n')

        # if a symbol is provided
        if symbol:
            self.dag_file.write('@SYMBOL="' + symbol+'"\n')

        # Write environmental variables
        if options.local:
            self.dag_file.write('@BATCH_LOCAL=1\n')
        if options.batch:
            self.dag_file.write('@BATCH_OPTIONS={0}\n'.format(options.batch))
        if options.collect:
            self.dag_file.write('@_MAKEFLOW_COLLECT_LIST+={0}\n'.format(
                ' '.join(map(str, options.collect))))
        for k, v in list(options.environment.items()):
            self.dag_file.write('@{0}={1}\n'.format(k, v))

        # Write task command
        self.dag_file.write('\t{0}\n'.format(command))
        self.dag_file.flush()
Example #46
0
    def _optimize_nested_abstractions(self):
        """ Internally, we perform inline abstractions optimization as we build
        the DAG, so we should only execute the body of this method if we want
        to automatically nest abstractions after the fact.
        """
        if not CurrentScript().nested_abstractions:
            return

        debug(D_NEST, 'Inlining Abstractions for {0}'.format(self))

        # Group tasks into bins based on Abstractions.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            task_dict[abstraction].append(task)

        # For each Abstraction, create InlineNest and schedule tasks to be
        # executed there; only do this if we have more than one Abstraction.
        self.tasks = []
        if len(task_dict.keys()) > 1:
            for abstraction, tasks in task_dict.items():
                # For tasks scheduled directly by a Function (Abstraction is
                # None), then simply schedule for execution in current Nest.
                if abstraction is SENTINEL:
                    self.tasks.extend(tasks)
                    continue

                # Otherwise, create a new InlineNest and then schedule tasks to
                # run in this new Nest.
                with InlineNest() as inline_nest:
                    for task in tasks:
                        inline_nest.schedule(*task)
                    inline_nest.compile()

                # Engine is also a Function, so call it to schedule the task
                # responsible for InlineNest to run in the current Nest.
                with abstraction.options:
                    inline_nest()
        else:
            # Copy tasks from Abstractions to Nest task list.
            for abstraction, tasks in task_dict.items():
                for task in tasks:
                    self.tasks.append(task)
Example #47
0
    def _optimize_nested_abstractions(self):
        """ Internally, we perform inline abstractions optimization as we build
        the DAG, so we should only execute the body of this method if we want
        to automatically nest abstractions after the fact.
        """
        if not CurrentScript().nested_abstractions:
            return

        debug(D_NEST, 'Inlining Abstractions for {0}'.format(self))

        # Group tasks into bins based on Abstractions.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            task_dict[abstraction].append(task)

        # For each Abstraction, create InlineNest and schedule tasks to be
        # executed there; only do this if we have more than one Abstraction.
        self.tasks = []
        if len(list(task_dict.keys())) > 1:
            for abstraction, tasks in list(task_dict.items()):
                # For tasks scheduled directly by a Function (Abstraction is
                # None), then simply schedule for execution in current Nest.
                if abstraction is SENTINEL:
                    self.tasks.extend(tasks)
                    continue

                # Otherwise, create a new InlineNest and then schedule tasks to
                # run in this new Nest.
                with InlineNest() as inline_nest:
                    for task in tasks:
                        inline_nest.schedule(*task)
                    inline_nest.compile()

                # Engine is also a Function, so call it to schedule the task
                # responsible for InlineNest to run in the current Nest.
                with abstraction.options:
                    inline_nest()
        else:
            # Copy tasks from Abstractions to Nest task list.
            for abstraction, tasks in list(task_dict.items()):
                for task in tasks:
                    self.tasks.append(task)
Example #48
0
    def __init__(self,
                 work_dir=None,
                 dag_path=None,
                 stash=None,
                 barrier=None,
                 wrapper=None,
                 track_imports=True,
                 track_exports=True):
        self.work_dir = work_dir or '.'
        self.tasks = []
        self.parent = CurrentNest()
        if self.parent:
            self.work_dir = os.path.join(self.parent.work_dir, self.work_dir)
        self.stash = stash or Stash(root=os.path.join(self.work_dir, '_Stash'))

        if not os.path.exists(self.work_dir):
            make_directory(self.work_dir)

        Makeflow.__init__(self,
                          wrapper=wrapper,
                          track_imports=track_imports,
                          track_exports=track_exports)

        self.dag_path = dag_path or os.path.join(self.work_dir, 'Makeflow')
        self.dag_file = open(self.dag_path, 'w')
        self.includes.add(self.dag_path)
        # TODO: fix work_dir so it can be translated by makeflow_link

        if barrier:
            self.includes.update(parse_input_list(barrier))

        # Since Abstractions and SubNests are not compiled immediately, these
        # objects must regster with their parent Nest, who will compile them in
        # the order that they are registered to ensure proper semantics.
        self.futures = []

        if self.parent:
            debug(
                D_NEST,
                'Register child {0} with parent {1}'.format(self, self.parent))
            self.parent.futures.append((self, True))

        debug(D_NEST, 'Created {0}'.format(self))
Example #49
0
    def _query(self, filters, **parameters):
        cursor = None
        try:
            if self.db_conn is None:
                self.connect()

            try:
                fields = parameters['fields']
            except KeyError:
                fields = self.db_fields
            try:
                limit = int(parameters['limit'])
            except KeyError:
                limit = None
            try:
                path = parameters['path']
            except KeyError:
                path = self.path

            cursor = self.db_conn.cursor()
            query = self.db_query_format.format(fields=','.join(fields),
                                                table=self.db_table,
                                                filters=' AND '.join(filters))

            if limit:
                query = '{0} LIMIT {1}'.format(query, limit)

            debug(D_DATASET, 'Executing SQL query: {0}'.format(query))
            cursor.execute(query)
            for row in cursor.fetchall():
                yield MakeFile(path(self, row), self.nest)
        except Exception as e:
            fatal(D_DATASET,
                  'Unable to perform SQL query: {0}'.format(e),
                  print_traceback=True)
        finally:
            if cursor:
                cursor.close()
            if not self.db_conn_keep_alive:
                self.disconnect()

        raise StopIteration
Example #50
0
    def _query(self, filters, **parameters):
        cursor = None
        try:
            if self.db_conn is None:
                self.connect()

            try:
                fields = parameters['fields']
            except KeyError:
                fields = self.db_fields
            try:
                limit = int(parameters['limit'])
            except KeyError:
                limit = None
            try:
                path = parameters['path']
            except KeyError:
                path = self.path

            cursor = self.db_conn.cursor()
            query  = self.db_query_format.format(
                fields  = ','.join(fields),
                table   = self.db_table,
                filters = ' AND '.join(filters))

            if limit:
                query = '{0} LIMIT {1}'.format(query, limit)

            debug(D_DATASET, 'Executing SQL query: {0}'.format(query))
            cursor.execute(query)
            for row in cursor.fetchall():
                yield MakeFile(path(self, row), self.nest)
        except Exception as e:
            fatal(D_DATASET, 'Unable to perform SQL query: {0}'.format(e), print_traceback=True)
        finally:
            if cursor:
                cursor.close()
            if not self.db_conn_keep_alive:
                self.disconnect()

        raise StopIteration
Example #51
0
    def __init__(self, function, inputs=None, outputs=None, includes=None,
        native=False, group=None, collect=False, local=False):
        # Must set id before we call Dataset.__init__ due to debugging
        # statement in said function.
        self.id         = next(self.Counter)
        self.function   = function
        self.inputs     = inputs
        self.outputs    = outputs or '{stash}'
        self.includes   = includes
        self.native     = native
        self.group      = group or 0
        self.local      = local
        Dataset.__init__(self)

        if collect:
            self.collect = parse_input_list(self.inputs)
        else:
            self.collect = None
        self.options = Options(local=self.local, collect=self.collect)

        self.nest.futures.append((self, False))
        debug(D_ABSTRACTION, 'Registered Abstraction {0} with {1}'.format(self, self.nest))
Example #52
0
    def execute(self, arguments=None, exit_on_failure=False):
        """ Execute DAG using Makeflow. """
        if self.dag_file is None:
            raise WeaverError(D_ENGINE, 'Cannot execute an empty DAG')

        # Ensure that DAG is written to disk.
        self.dag_file.flush()

        # Execute emitted DAG from the current Nest path.
        try:
            command_list = [self.path, os.path.relpath(self.dag_path, self.work_dir)]
            if self.wrapper:
                command_list.insert(0, self.wrapper)
            if arguments:
                # Is the -B option has been used
                arg_groups = re.search("(-\S)?\s?(\S*)\s?(-B)\s[\"'](.*)[\"']\s?(-\S)?\s?(\S*)", arguments)
                if arg_groups:
                    for arg_group in arg_groups.groups():
                        if arg_group: command_list.extend([arg_group])
                else:
                    command_list.extend(arguments.split())
            debug(D_ENGINE, 'Executing DAG {0} using {1} in {2}'.format(
                self.dag_path, self.path, self.work_dir))
            subprocess.check_call(command_list, cwd=self.work_dir)
        except subprocess.CalledProcessError as e:
            """
            if exit_on_failure:
                log_func = fatal
            else:
                log_func = warn
            log_func(D_ENGINE, 'Failed to execute DAG {0} using {1}:\n{2}'.format(
                self.dag_path, self.path, e))
            """
            raise RuntimeError('Failed to execute DAG {0} using {1}:\n{2}'.format(self.dag_path, self.path, e))

# vim: set sts=4 sw=4 ts=8 expandtab ft=python:
Example #53
0
    def _generate(self):
        with self:
            debug(D_ABSTRACTION, 'Generating Abstraction {0}'.format(self))

            function = parse_function(self.function)
            inputs   = parse_input_list(self.inputs)
            includes = parse_input_list(self.includes)
            output   = self.outputs
            nest     = CurrentNest()

            if not os.path.isabs(output):
                output = os.path.join(nest.work_dir, output)

            while len(inputs) > self.group:
                next_inputs = []
                for group in groups(inputs, self.group):
                    output_file = next(nest.stash)
                    next_inputs.append(output_file)
                    with Options(local=self.options.local, collect=group if self.collect else None):
                        yield function(group, output_file, None, includes)
                inputs = next_inputs

            with Options(local=self.options.local, collect=inputs if self.collect else None):
                yield function(inputs, output, None, includes)
Example #54
0
    def __iter__(self):
        # Generate the cache under any of the following conditions:
        #
        #   1. Cache file does not exist
        #   2. Cache file exists, is older than compile start time, and we are
        #      forced to do so
        debug(D_DATASET, 'Iterating on Dataset {0}'.format(self))
        if os.path.exists(self.cache_path):
            # If cache file is made after we started compiling, then it is
            # valid, so don't bother generating.
            if CurrentScript().start_time <= os.stat(self.cache_path).st_ctime:
                debug(D_DATASET, 'Loading Dataset {0}'.format(self))
                return (MakeFile(f.strip(), self.nest) \
                    for f in open(self.cache_path, 'r'))

            message = 'Cache file {0} already exists'.format(self.cache_path)
            if CurrentScript().force:
                warn(D_DATASET, message)
            else:
                fatal(D_DATASET, message)

        debug(D_DATASET, 'Generating Dataset {0}'.format(self))
        return self._generate()
Example #55
0
    def __iter__(self):
        # Generate the cache under any of the following conditions:
        #
        #   1. Cache file does not exist
        #   2. Cache file exists, is older than compile start time, and we are
        #      forced to do so
        debug(D_DATASET, 'Iterating on Dataset {0}'.format(self))
        if os.path.exists(self.cache_path):
            # If cache file is made after we started compiling, then it is
            # valid, so don't bother generating.
            if CurrentScript().start_time <= os.stat(self.cache_path).st_ctime:
                debug(D_DATASET, 'Loading Dataset {0}'.format(self))
                return (MakeFile(f.strip(), self.nest) \
                    for f in open(self.cache_path, 'r'))

            message = 'Cache file {0} already exists'.format(self.cache_path)
            if CurrentScript().force:
                warn(D_DATASET, message)
            else:
                fatal(D_DATASET, message)

        debug(D_DATASET, 'Generating Dataset {0}'.format(self))
        return self._generate()
Example #56
0
    def __init__(self, args):
        self.path                = None
        self.force               = True        # Ignore warnings
        self.import_builtins     = True        # Load built-ins
        self.output_directory    = os.curdir   # Where to create artifacts
        self.start_time          = time.time() # Record beginning of compiling
        self.options             = Options()
        self.nested_abstractions = False
        self.inline_tasks        = 1
        self.execute_dag         = False
        self.globals             = {}
        self.engine_wrapper      = None
        self.engine_arguments    = None
        self.include_symbols     = False
        self.normalize_paths     = True

        args = collections.deque(args)
        while args:
            arg = args.popleft()
            try:
                if arg.startswith('-'):
                    self.SCRIPT_OPTIONS_TABLE[arg](self, args)
                else:
                    self.path = arg
                    self.arguments = list(args)
                    args.clear()
            except (IndexError, KeyError):
                fatal(D_SCRIPT, 'invalid command line option: {0}'.format(arg))

        if self.normalize_paths:
            self.output_directory = os.path.abspath(self.output_directory)

        debug(D_SCRIPT, 'path                = {0}'.format(self.path))
        debug(D_SCRIPT, 'force               = {0}'.format(self.force))
        debug(D_SCRIPT, 'import_builtins     = {0}'.format(self.import_builtins))
        debug(D_SCRIPT, 'output_directory    = {0}'.format(self.output_directory))
        debug(D_SCRIPT, 'start_time          = {0}'.format(self.start_time))
        debug(D_SCRIPT, 'options             = {0}'.format(self.options))
        debug(D_SCRIPT, 'nested_abstractions = {0}'.format(self.nested_abstractions))
        debug(D_SCRIPT, 'inline_tasks        = {0}'.format(self.inline_tasks))
        debug(D_SCRIPT, 'execute_dag         = {0}'.format(self.execute_dag))
        debug(D_SCRIPT, 'engine_wrapper      = {0}'.format(self.engine_wrapper))
        debug(D_SCRIPT, 'engine_arguments    = {0}'.format(self.engine_arguments))
        debug(D_SCRIPT, 'normalize_paths     = {0}'.format(self.normalize_paths))

        if self.path is None:
            self.show_usage()
Example #57
0
    def __init__(self, function=None, force=False, import_builtins=True, output_directory=None,
                 execute_dag=False, engine_wrapper=None, engine_arguments=None, args=[]):
        self.function = function
        self.arguments = args
        self.force = force # Ignore warnings
        self.import_builtins = True # Load built-ins
        if output_directory is None:
            self.output_directory = os.curdir # Where to create artifacts
        else:
            self.output_directory = output_directory
        self.start_time = time.time() # Record beginning of compiling
        self.options = Options()
        self.nested_abstractions = False
        self.inline_tasks = 1
        self.execute_dag         = execute_dag
        self.globals             = {}
        self.engine_wrapper      = engine_wrapper
        self.engine_arguments    = engine_arguments
        self.include_symbols     = False

        debug(D_SCRIPT, 'force               = {0}'.format(self.force))
        debug(D_SCRIPT, 'import_builtins     = {0}'.format(self.import_builtins))
        debug(D_SCRIPT, 'output_directory    = {0}'.format(self.output_directory))
        debug(D_SCRIPT, 'start_time          = {0}'.format(self.start_time))
        debug(D_SCRIPT, 'options             = {0}'.format(self.options))
        debug(D_SCRIPT, 'nested_abstractions = {0}'.format(self.nested_abstractions))
        debug(D_SCRIPT, 'inline_tasks        = {0}'.format(self.inline_tasks))
        debug(D_SCRIPT, 'execute_dag         = {0}'.format(self.execute_dag))
        debug(D_SCRIPT, 'engine_wrapper      = {0}'.format(self.engine_wrapper))
        debug(D_SCRIPT, 'engine_arguments    = {0}'.format(self.engine_arguments))
Example #58
0
    def __init__(self, args):
        self.path = None
        self.force = False  # Ignore warnings
        self.import_builtins = True  # Load built-ins
        self.output_directory = os.curdir  # Where to create artifacts
        self.start_time = time.time()  # Record beginning of compiling
        self.options = Options()
        self.nested_abstractions = False
        self.inline_tasks = 1
        self.execute_dag = False
        self.globals = {}
        self.engine_wrapper = None
        self.engine_arguments = None
        self.include_symbols = False
        self.normalize_paths = True

        args = collections.deque(args)
        while args:
            arg = args.popleft()
            try:
                if arg.startswith('-'):
                    self.SCRIPT_OPTIONS_TABLE[arg](self, args)
                else:
                    self.path = arg
                    self.arguments = list(args)
                    args.clear()
            except (IndexError, KeyError):
                fatal(D_SCRIPT, 'invalid command line option: {0}'.format(arg))

        if self.normalize_paths:
            self.output_directory = os.path.abspath(self.output_directory)

        debug(D_SCRIPT, 'path                = {0}'.format(self.path))
        debug(D_SCRIPT, 'force               = {0}'.format(self.force))
        debug(D_SCRIPT,
              'import_builtins     = {0}'.format(self.import_builtins))
        debug(D_SCRIPT,
              'output_directory    = {0}'.format(self.output_directory))
        debug(D_SCRIPT, 'start_time          = {0}'.format(self.start_time))
        debug(D_SCRIPT, 'options             = {0}'.format(self.options))
        debug(D_SCRIPT,
              'nested_abstractions = {0}'.format(self.nested_abstractions))
        debug(D_SCRIPT, 'inline_tasks        = {0}'.format(self.inline_tasks))
        debug(D_SCRIPT, 'execute_dag         = {0}'.format(self.execute_dag))
        debug(D_SCRIPT,
              'engine_wrapper      = {0}'.format(self.engine_wrapper))
        debug(D_SCRIPT,
              'engine_arguments    = {0}'.format(self.engine_arguments))
        debug(D_SCRIPT,
              'normalize_paths     = {0}'.format(self.normalize_paths))

        if self.path is None:
            self.show_usage()
Example #59
0
 def exit(self, type, value, traceback):
     stack.pop()
     debug(flag, 'Restored {0} {1}'.format(flag.title(), stack.top()))