Exemple #1
0
 def f():
     for inputs in input_lists[1:]:
         for input1, input2 in zip(inputs0, inputs):
             input2.storage[0] = copy(input1.storage[0])
     for x in to_reset:
         x[0] = None
     pre(self, [input.data for input in input_lists[0]], order,
         thunk_groups)
     for i, (thunks, node) in enumerate(zip(thunk_groups, order)):
         try:
             wrapper(self.fgraph, i, node, *thunks)
         except Exception:
             raise_with_op(self.fgraph, node, *thunks)
Exemple #2
0
 def __call__(self):
     if self.time_thunks:
         for cont in self.pre_call_clear:
             cont[0] = None
         try:
             for i, (thunk, node) in enumerate(zip(self.thunks, self.nodes)):
                 t0 = time.time()
                 thunk()
                 t1 = time.time()
                 self.call_counts[i] += 1
                 self.call_times[i] += t1 - t0
         except Exception:
             raise_with_op(self.fgraph, node, thunk)
     else:
         for cont in self.pre_call_clear:
             cont[0] = None
         try:
             for thunk, node in zip(self.thunks, self.nodes):
                 thunk()
         except Exception:
             raise_with_op(self.fgraph, node, thunk)
Exemple #3
0
    def __call__(self):
        if self.time_thunks:
            for cont in self.pre_call_clear:
                cont[0] = None
            try:
                i = 0
                for thunk, node, old_storage in zip_longest(
                        self.thunks,
                        self.nodes,
                        self.post_thunk_clear,
                        fillvalue=()):
                    t0 = time.time()
                    thunk()
                    t1 = time.time()
                    self.call_counts[i] += 1
                    self.call_times[i] += t1 - t0
                    for old_s in old_storage:
                        old_s[0] = None
                    i += 1
            except Exception:
                raise_with_op(self.fgraph, node, thunk)
        else:
            for cont in self.pre_call_clear:
                cont[0] = None
            try:
                for thunk, node, old_storage in zip_longest(
                        self.thunks,
                        self.nodes,
                        self.post_thunk_clear,
                        fillvalue=()):
                    thunk()
                    for old_s in old_storage:
                        old_s[0] = None
            except Exception:
                raise_with_op(self.fgraph, node, thunk)

        return self.perform_updates()
Exemple #4
0
    def __call__(self, output_subset=None):
        storage_map = self.storage_map
        compute_map = self.compute_map
        thunks = self.thunks
        dependencies = self.dependencies
        self.node_executed_order = []
        self.node_cleared_order = []

        for cont in self.pre_call_clear:
            cont[0] = None

        for k in self.storage_map:
            compute_map[k][0] = k.owner is None
            if self.callback_input and compute_map[k][0]:
                self.callback_input(k, self.storage_map[k][0])

        # apply_stack contains nodes
        if output_subset is not None:
            first_updated = len(self.outputs) - self.n_updates
            output_subset = output_subset + list(
                range(first_updated, len(self.outputs)))
            apply_stack = [
                self.outputs[i].owner for i in output_subset
                if self.outputs[i].owner
            ]
        else:
            apply_stack = list(self.base_apply_stack)

        last_apply_stack_len = -1

        # This record all function inputs/shared variables and constants
        for var, data in self.storage_map.items():
            if data[0] is None:
                continue
            if hasattr(var.type, "get_shape_info"):
                sh = var.type.get_shape_info(data[0])
            else:
                sh = "no shape"
            self.variable_shape[var] = sh
            st = getattr(data[0], "strides", "no strides")
            if getattr(data[0], "flags", False) and data[0].flags.c_contiguous:
                st = "c"
            elif hasattr(data[0],
                         "is_c_contiguous") and data[0].is_c_contiguous():
                st = "c"
            self.variable_strides[var] = st
            off = getattr(data[0], "offset", "")
            self.variable_offset[var] = off

        while apply_stack:
            # Make sure something happened last time round.  This is
            # just a safety check to make sure the op is written
            # correctly apply_stack should either decrease in length
            # by one (a thunk successfully applied), or increase in
            # length (added dependencies over and above the original).
            # NB: this doesn't catch cycles (would be too expensive/slow),
            #     just stalls.
            apply_stack_len = len(apply_stack)
            assert apply_stack_len != last_apply_stack_len
            last_apply_stack_len = apply_stack_len

            current_apply = apply_stack.pop()
            current_inputs = current_apply.inputs
            current_outputs = current_apply.outputs
            current_deps = current_inputs + current_apply.destroy_dependencies

            computed_ins = all(compute_map[v][0] for v in current_deps)
            computed_outs = all(compute_map[v][0] for v in current_outputs)

            if not thunks[self.node_idx[current_apply]].lazy:
                #
                # stack loop: Normal Non-Lazy Case
                # ================================
                #
                # Check if all inputs are in place
                # If so compute thunk and remove it from the apply_stack
                # If not leave it in, and add to the apply_stack those
                # that will produce you those inputs

                if computed_ins and not computed_outs:
                    # -- Non-lazy case: have inputs, time to compute outputs
                    try:
                        _, dt = self.run_thunk_of_node(current_apply)
                        del _
                        if config.profile or config.print_global_stats:
                            current_idx = self.node_idx[current_apply]
                            self.call_counts[current_idx] += 1
                            self.call_times[current_idx] += dt
                            # Computing the memory footprint of the the op
                            # ?? What about inplace .. if the op is inplace
                            # you don't actually ask for more memory!
                            for (idx, o) in enumerate(thunks[
                                    self.node_idx[current_apply]].outputs):
                                var = self.nodes[current_idx].outputs[idx]
                                if hasattr(var.type, "get_shape_info"):
                                    sh = var.type.get_shape_info(o[0])
                                else:
                                    sh = "no shape"
                                self.variable_shape[var] = sh
                                st = getattr(o[0], "strides", "no strides")
                                if (getattr(o[0], "flags", False)
                                        and o[0].flags.c_contiguous):
                                    st = "c"
                                elif (hasattr(o[0], "is_c_contiguous")
                                      and o[0].is_c_contiguous()):
                                    st = "c"
                                self.variable_strides[var] = st
                                off = getattr(o[0], "offset", "")
                                self.variable_offset[var] = off
                    except Exception:
                        raise_with_op(
                            self.fgraph,
                            current_apply,
                            self.thunks[self.node_idx[current_apply]],
                            storage_map=storage_map,
                        )
                    for o in current_apply.outputs:
                        compute_map[o][0] = 1

                    input_index = []
                    # A list store the index of inputs variables

                    if self.allow_gc:
                        for i in current_apply.inputs:
                            # Garbage Collection -> check if anybody else uses
                            # this input
                            if dependencies[
                                    i] and i.owner and i not in self.outputs:
                                if all(compute_map[v][0]
                                       for v in dependencies[i]):
                                    storage_map[i][0] = None
                                    input_index.append(
                                        current_apply.inputs.index(i))

                                    # DO NOT set compute_map to 0

                                    # If values become False and the
                                    # current_apply is still in the
                                    # stack, this will cause it to be
                                    # recomputed! This can cause wrong value
                                    # with some combination of inplace op.
                                    compute_map[i][0] = 2
                                    if (config.warn__vm_gc_bug
                                            and current_apply in apply_stack
                                            and getattr(
                                                current_apply.op,
                                                "destroy_map", False)):
                                        warnings.warn(
                                            "There was a bug that existed in "
                                            "the default Aesara configuration,"
                                            " only in the development version "
                                            "between July 5th 2012 and "
                                            "July 30th 2012. This was not in "
                                            "a released version. The bug was "
                                            "affecting this script.",
                                            # The stack level is not good when
                                            # inside a Scan.
                                            stacklevel=3,
                                        )
                    self.node_cleared_order.append(input_index)

                elif not computed_ins:
                    # -- Non-lazy case, need inputs
                    apply_stack.append(current_apply)
                    apply_stack.extend(inp.owner for inp in current_deps
                                       if inp.owner)

            elif not computed_outs:
                #
                # stack loop: Lazy Evaluation Case
                # ================================
                #
                # Lazy evaluation protocol is to run the thunk with the
                # current storage_map and compute_map accessed via closure,
                # and the thunk will return a list of variables from its input
                # list that it requires.

                try:
                    requires, dt = self.run_thunk_of_node(current_apply)
                    current_idx = self.node_idx[current_apply]
                    self.call_counts[current_idx] += 1
                    self.call_times[current_idx] += dt

                except Exception:
                    raise_with_op(
                        self.fgraph,
                        current_apply,
                        self.thunks[self.node_idx[current_apply]],
                        storage_map=storage_map,
                    )

                if requires:
                    for r in requires:
                        # We are not done with this op ..  so we added
                        # back and see to get the inputs we are
                        # missing
                        apply_stack.append(current_apply)
                        if current_apply.inputs[r].owner:
                            apply_stack.append(current_apply.inputs[r].owner)
                else:
                    if config.profile or config.print_global_stats:
                        for (idx, o) in enumerate(
                                thunks[self.node_idx[current_apply]].outputs):
                            var = self.nodes[
                                self.node_idx[current_apply]].outputs[idx]

                            if hasattr(var.type, "get_shape_info"):
                                sh = var.type.get_shape_info(o[0])
                            else:
                                sh = "no shape"
                            self.variable_shape[var] = sh
                            st = getattr(o[0], "strides", "no strides")
                            if (getattr(o[0], "flags", False)
                                    and o[0].flags.c_contiguous):
                                st = "c"
                            elif (hasattr(o[0], "is_c_contiguous")
                                  and o[0].is_c_contiguous()):
                                st = "c"
                            self.variable_strides[var] = st
                            off = getattr(o[0], "offset", "")
                            self.variable_offset[var] = off

                    input_index = []

                    if self.allow_gc:
                        for i in current_apply.inputs:
                            if dependencies[
                                    i] and i.owner and i not in self.outputs:
                                empty_storage_map = True
                                for x in dependencies[i]:
                                    if not compute_map[x][0]:
                                        empty_storage_map = False
                                        break
                                if empty_storage_map:
                                    storage_map[i][0] = None
                                    input_index.append(
                                        current_apply.inputs.index(i))
                                    # See the not lazy gc code for explanations
                                    # of compute_map change
                                    compute_map[i][0] = 2

                    self.node_cleared_order.append(input_index)

        # Hacky coarse gc final pass
        # This is required until we have a proper gc algorithm for graphs with
        # lazy evaluation. See discussion on theano-dev June 19 2012.
        final_index = []

        if self.allow_gc:
            for v in storage_map:
                if v.owner and v not in self.outputs:
                    if compute_map[v][0] == 2:
                        continue
                    else:
                        storage_map[v][0] = None
                        final_index.append(v)
                        compute_map[v][0] = 2

        self.node_cleared_order.append(final_index)
Exemple #5
0
    def make_all(
        self,
        profiler=None,
        input_storage=None,
        output_storage=None,
        storage_map=None,
    ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)

        input_storage, output_storage, storage_map = map_storage(
            fgraph, order, input_storage, output_storage, storage_map)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = []

        t0 = time.time()
        linker_make_thunk_time = {}
        impl = None
        if self.c_thunks is False:
            impl = "py"
        for node in order:
            try:
                thunk_start = time.time()
                # no-recycling is done at each VM.__call__ So there is
                # no need to cause duplicate c code by passing
                # no_recycling here.
                thunks.append(
                    node.op.make_thunk(node,
                                       storage_map,
                                       compute_map, [],
                                       impl=impl))
                linker_make_thunk_time[node] = time.time() - thunk_start
                if not hasattr(thunks[-1], "lazy"):
                    # We don't want all ops maker to think about lazy Ops.
                    # So if they didn't specify that its lazy or not, it isn't.
                    # If this member isn't present, it will crash later.
                    thunks[-1].lazy = False
            except Exception:
                raise_with_op(fgraph, node)

        t1 = time.time()

        if self.profile:
            self.profile.linker_node_make_thunks += t1 - t0
            self.profile.linker_make_thunk_time = linker_make_thunk_time

        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        lazy = self.lazy
        if lazy is None:
            lazy = config.vm__lazy
        if lazy is None:
            lazy = any(th.lazy for th in thunks)
        if not (lazy or ((config.profile or config.print_global_stats)
                         and config.profile_memory) or self.use_cloop
                or self.callback or self.callback_input):
            reallocated_vars = self.reduce_storage_allocations(
                storage_map, order)
        else:
            reallocated_vars = ()

        computed, last_user = gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if (input in computed and input not in fgraph.outputs
                            and node == last_user[input]
                            and input not in reallocated_vars):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(
            order,
            thunks,
            input_storage,
            output_storage,
            storage_map,
            post_thunk_clear,
            computed,
            compute_map,
            self.updated_vars,
        )

        vm.storage_map = storage_map
        vm.compute_map = compute_map

        return (
            vm,
            [
                Container(input, storage)
                for input, storage in zip(fgraph.inputs, input_storage)
            ],
            [
                Container(output, storage, readonly=True)
                for output, storage in zip(fgraph.outputs, output_storage)
            ],
            thunks,
            order,
        )