Ejemplo n.º 1
0
    def make_all(
        self,
        profiler=None,
        input_storage=None,
        output_storage=None,
    ):
        expanded_inputs = self.expanded_inputs  # hacky argumentpassing workaround
        env = self.env
        order = list(env.toposort())
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
            env, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = [
            node.op.make_thunk(node, storage_map, compute_map, no_recycling)
            for node in order
        ]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed) and (input not in env.outputs)
                            and (node == last_user[input])):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        # calculate the update_storage map whose keys are shared var inputs
        # and whose values are the outputs that hold their updates

        updated_vars = {}
        if expanded_inputs:
            # Update the inputs that have an update function
            potential_values = list(env.outputs)
            assert len(expanded_inputs) == len(env.inputs)
            for e_input, ivar in reversed(zip(expanded_inputs, env.inputs)):
                if e_input.update is not None:
                    updated_vars[ivar] = potential_values.pop()

        vm = self.make_vm(order, thunks, input_storage, output_storage,
                          storage_map, post_thunk_clear, computed, compute_map,
                          updated_vars)

        return (vm, [
            link.Container(input, storage)
            for input, storage in zip(env.inputs, input_storage)
        ], [
            link.Container(output, storage, True)
            for output, storage in zip(env.outputs, output_storage)
        ], thunks, order)
Ejemplo n.º 2
0
Archivo: vm.py Proyecto: ouais/Theano
    def make_all(self, profiler=None, input_storage=None,
                 output_storage=None,
                ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
                fgraph, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = [node.op.make_thunk(node,
                    storage_map,
                    compute_map,
                    no_recycling)
                        for node in order]
        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed)
                            and (input not in fgraph.outputs)
                            and (node == last_user[input])):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(order, thunks,
                input_storage, output_storage, storage_map,
                post_thunk_clear,
                computed,
                compute_map,
                self.updated_vars
                )

        return (vm,
                [link.Container(input, storage)
                 for input, storage in zip(fgraph.inputs, input_storage)],
                [link.Container(output, storage, True)
                 for output, storage in zip(fgraph.outputs, output_storage)],
                thunks,
                order)
Ejemplo n.º 3
0
    def make_all(self, profiler=None, input_storage=None,
                 output_storage=None,
                ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
                fgraph, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = [node.op.make_thunk(node,
                    storage_map,
                    compute_map,
                    no_recycling)
                        for node in order]
        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed)
                            and (input not in fgraph.outputs)
                            and (node == last_user[input])):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(order, thunks,
                input_storage, output_storage, storage_map,
                post_thunk_clear,
                computed,
                compute_map,
                self.updated_vars
                )

        return (vm,
                [link.Container(input, storage)
                 for input, storage in zip(fgraph.inputs, input_storage)],
                [link.Container(output, storage, True)
                 for output, storage in zip(fgraph.outputs, output_storage)],
                thunks,
                order)
Ejemplo n.º 4
0
class VM_Linker(link.LocalLinker):
    """
    Class that satisfies the Linker interface by acting as a VM factory.
    """
    def __init__(self,
                 allow_gc=None,
                 use_cloop=False,
                 callback=None,
                 lazy=None,
                 schedule=None):
        """
        allow_gc - force the virtual machine to clean up unnecessary
            references, in order to allow garbage collection on
            intermediate values during computation of a function.
            If None use as default the value of the Theano flag allow_gc.

        use_cloop - use the C-based virtual machine if possible

        callback - a callable object to call after each call to a thunk within
            the virtual machine.  It will be called with four arguments called
            'node', 'thunk', 'storage_map', and 'compute_map'.

        lazy - Useful only when use_cloop is False. When lazy is None, use the
            theano flag vm.lazy value. Then if we have a None (default) we auto
            detect if lazy evaluation is needed and use the apropriate
            version. If lazy is True or False, we force the version used
            between Loop/LoopGC and Stack.

        """
        # Note: if more parameters are added to __init__, make sure to forward
        # them in the "type(self)(...)" call in the "accept" method below.
        if allow_gc is None:
            allow_gc = config.allow_gc
        self.fgraph = None
        self.allow_gc = allow_gc
        self.use_cloop = use_cloop
        self.callback = callback
        self.lazy = lazy
        self.updated_vars = {}
        if schedule:
            self.schedule = schedule

    def accept(self, fgraph, no_recycling=None):
        """
        :param fgraph: a PerformLinker can have accepted one FunctionGraph
            instance at a time.

        :param no_recycling: WRITEME

        :returns: self if fgraph is the first FunctionGraph that has ever been
            associated to self, else, a new VM_Linker associated to fgraph.
        """
        if (config.profile and hasattr(theano, 'sandbox')
                and hasattr(theano.sandbox, 'cuda')
                and theano.sandbox.cuda.cuda_enabled):
            if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
                raise Exception(
                    "You are running the Theano profiler with CUDA enabled."
                    " Theano GPU ops execution is asynchronous by default."
                    " So by default, the profile is useless."
                    " You must set the environment variable"
                    " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
                    " synchronize the execution to get a meaningful profile.")

        if no_recycling is None:
            no_recycling = []
        if self.fgraph is not None and self.fgraph is not fgraph:
            # Build a new VM_Linker, and call accept on that one.
            # Warning: make sure to forward the correct values of
            # all parameters to __init__ here.
            return type(self)(allow_gc=self.allow_gc,
                              use_cloop=self.use_cloop,
                              callback=self.callback,
                              lazy=self.lazy,
                              schedule=self.schedule).accept(
                                  fgraph, no_recycling)
        self.fgraph = fgraph
        self.no_recycling = no_recycling
        return self

    def accept_var_updates(self, updated_vars):
        self.updated_vars = updated_vars
        # This method simply records in the linker which variables have update
        # expressions.  It does not imply that the linker will actually
        # implement these updates (see need_update_inputs).  This mechanism is
        # admittedly confusing, and it could use some cleaning up. The base
        # Linker object should probably go away completely.

    def compute_gc_dependencies(self, variables):
        """
        Returns dict: variable K -> list of variables [v1, v2, v3, ...]
        for each K in variables.


        The variables v1, v2, ... are the full set of variables that depend
        directly on K. When we know that none of them will need to be
        computed, we know that:
        * K will not need to be computed
        * if K is already computed, it can be released for garbage collection


        Parameters
        ----------
        variables - iterable over the variables used in a graph computation.


        N.B. gc means garbage collection

        """
        dependencies = {}
        for k in variables:
            dependencies[k] = []
            # If k has no owner, it is an input / constant and its value
            # should not be removed from the storage_map because we have no
            # way of getting it back.
            #
            # XXX if k has no clients... what is it doing in the computation?
            if k.owner and k.clients:
                ls = []
                for cl in k.clients:
                    if cl[0] != 'output':
                        ls += cl[0].outputs
                dependencies[k] += ls
        return dependencies

    def make_vm(
        self,
        nodes,
        thunks,
        input_storage,
        output_storage,
        storage_map,
        post_thunk_clear,
        computed,
        compute_map,
        updated_vars,
    ):

        pre_call_clear = [storage_map[v] for v in self.no_recycling]

        if (self.callback is not None
                or (config.profile and config.profile_memory)):

            if self.use_cloop and self.callback is not None:
                logger.warn('CVM does not support callback, using Stack VM.')
            if self.use_cloop and config.profile_memory:
                warnings.warn(
                    'CVM does not support memory profile, using Stack VM.')
            # Needed for allow_gc=True, profiling and storage_map reuse
            deps = self.compute_gc_dependencies(storage_map)
            vm = Stack(nodes,
                       thunks,
                       pre_call_clear,
                       storage_map,
                       compute_map,
                       self.fgraph,
                       self.allow_gc,
                       dependencies=deps,
                       callback=self.callback)
        elif self.use_cloop:
            # create a map from nodes to ints and vars to ints
            nodes_idx = {}
            vars_idx = {}
            for i, node in enumerate(nodes):
                nodes_idx[node] = i
                for v in node.inputs + node.outputs:
                    vars_idx.setdefault(v, len(vars_idx))
            for v in self.fgraph.inputs + self.fgraph.outputs:
                vars_idx.setdefault(v, len(vars_idx))

            nodes_idx_inv = {}
            vars_idx_inv = {}
            for (node, i) in nodes_idx.items():
                nodes_idx_inv[i] = node
            for (var, i) in vars_idx.items():
                vars_idx_inv[i] = var

            # put storage_map and compute_map into a int-based scheme
            n_applies = len(nodes)
            storage_map_list = [
                storage_map[vars_idx_inv[i]] for i in xrange(len(vars_idx_inv))
            ]
            compute_map_list = [
                compute_map[vars_idx_inv[i]] for i in xrange(len(vars_idx_inv))
            ]
            if nodes:
                assert type(storage_map_list[0]) is list
                assert type(compute_map_list[0]) is list

            # Needed for allow_gc=True, profiling and storage_map reuse
            dependency_map = self.compute_gc_dependencies(storage_map)
            dependency_map_list = [[
                vars_idx[d] for d in dependency_map[vars_idx_inv[i]]
            ] for i in xrange(len(vars_idx_inv))]

            # build the pointers to node inputs and offsets
            base_input_output_list = []
            node_n_inputs = []
            node_n_outputs = []
            node_input_offset = []
            node_output_offset = []
            for node in nodes:
                inputs_idx = [vars_idx[v] for v in node.inputs]
                outputs_idx = [vars_idx[v] for v in node.outputs]
                node_n_inputs.append(len(inputs_idx))
                node_n_outputs.append(len(outputs_idx))
                node_input_offset.append(len(base_input_output_list))
                base_input_output_list.extend(inputs_idx)
                node_output_offset.append(len(base_input_output_list))
                base_input_output_list.extend(outputs_idx)

            # build the var owner array
            var_owner = [None] * len(vars_idx)
            for (var, i) in vars_idx.items():
                if var.owner:
                    var_owner[i] = nodes_idx[var.owner]

            is_lazy_list = [int(th.lazy) for th in thunks]
            output_vars = [vars_idx[v] for v in self.fgraph.outputs]

            # builds the list of prereqs induced by e.g. destroy_handler
            ords = self.fgraph.orderings()
            node_prereqs = []
            node_output_size = []
            for i, node in enumerate(nodes):
                node_output_size.append(0)
                prereq_var_idxs = []
                for prereq_node in ords.get(node, []):
                    prereq_var_idxs.extend(
                        [vars_idx[v] for v in prereq_node.outputs])
                prereq_var_idxs = list(set(prereq_var_idxs))
                prereq_var_idxs.sort()  # TODO: why sort?
                node_prereqs.append(prereq_var_idxs)

            # Builds the list of input storage to update (according to update
            # rules) when the outputs are computed.
            # They are in the same order as the second part of output_vars
            # (output_vars contains first the returned outputs, then the
            # values of the update expressions).
            update_storage = []
            update_in_from_out = {}
            for (ivar, ovar) in updated_vars.items():
                update_in_from_out[vars_idx[ovar]] = vars_idx[ivar]
            for oidx in output_vars:
                if oidx in update_in_from_out:
                    update_storage.append(update_in_from_out[oidx])

            c0 = sys.getrefcount(node_n_inputs)
            vm = CVM(
                nodes,
                thunks,
                pre_call_clear,
                allow_gc=self.allow_gc,
                call_counts=[0] * len(nodes),
                call_times=[0.0] * len(nodes),
                compute_map_list=compute_map_list,
                storage_map_list=storage_map_list,
                base_input_output_list=base_input_output_list,
                node_n_inputs=node_n_inputs,
                node_n_outputs=node_n_outputs,
                node_input_offset=node_input_offset,
                node_output_offset=node_output_offset,
                var_owner=var_owner,
                is_lazy_list=is_lazy_list,
                output_vars=output_vars,
                node_prereqs=node_prereqs,
                node_output_size=node_output_size,
                update_storage=update_storage,
                dependencies=dependency_map_list,
            )
            assert c0 == sys.getrefcount(node_n_inputs)
        else:
            lazy = self.lazy
            if lazy is None:
                lazy = config.vm.lazy
            if lazy is None:
                lazy = not all([(not th.lazy) for th in thunks])
            if not lazy:
                # there is no conditional in the graph
                if self.allow_gc:
                    vm = LoopGC(
                        nodes,
                        thunks,
                        pre_call_clear,
                        post_thunk_clear,
                    )
                else:
                    vm = Loop(
                        nodes,
                        thunks,
                        pre_call_clear,
                    )
            else:
                # Needed when allow_gc=True and profiling
                deps = self.compute_gc_dependencies(storage_map)
                vm = Stack(nodes,
                           thunks,
                           pre_call_clear,
                           storage_map,
                           compute_map,
                           self.fgraph,
                           self.allow_gc,
                           dependencies=deps)
        return vm

    def make_all(
        self,
        profiler=None,
        input_storage=None,
        output_storage=None,
    ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
            fgraph, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = []

        # Collect Reallocation Info
        compute_map_re = defaultdict(lambda: [0])
        for var in fgraph.inputs:
            compute_map_re[var][0] = 1

        if getattr(fgraph.profile, 'dependencies', None):
            dependencies = getattr(fgraph.profile, 'dependencies')
        else:
            dependencies = self.compute_gc_dependencies(storage_map)

        reallocated_info = calculate_reallocate_info(order, fgraph,
                                                     storage_map,
                                                     compute_map_re,
                                                     dependencies)

        for node in order:
            try:
                thunks.append(
                    node.op.make_thunk(node, storage_map, compute_map,
                                       no_recycling))
                if not hasattr(thunks[-1], 'lazy'):
                    # We don't want all ops maker to think about lazy Ops.
                    # So if they didn't specify that its lazy or not, it isn't.
                    # If this member isn't present, it will crash later.
                    thunks[-1].lazy = False
            except Exception, e:
                e.args = ("The following error happened while"
                          " compiling the node", node, "\n") + e.args
                raise
        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        lazy = self.lazy
        if lazy is None:
            lazy = config.vm.lazy
        if lazy is None:
            lazy = not all([(not th.lazy) for th in thunks])
        if not (lazy or (config.profile and config.profile_memory)
                or self.use_cloop or self.callback):
            for pair in reallocated_info.values():
                storage_map[pair[1]] = storage_map[pair[0]]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed) and (input not in fgraph.outputs)
                            and (node == last_user[input])
                            and input not in reallocated_info.keys()):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(
            order,
            thunks,
            input_storage,
            output_storage,
            storage_map,
            post_thunk_clear,
            computed,
            compute_map,
            self.updated_vars,
        )

        vm.storage_map = storage_map

        return (vm, [
            link.Container(input, storage)
            for input, storage in zip(fgraph.inputs, input_storage)
        ], [
            link.Container(output, storage, True)
            for output, storage in zip(fgraph.outputs, output_storage)
        ], thunks, order)
Ejemplo n.º 5
0
    def make_all(self, profiler=None, input_storage=None,
                 output_storage=None,
                 ):
        fgraph = self.fgraph
        order = self.schedule(fgraph)
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
            fgraph, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = []

        # Collect Reallocation Info
        compute_map_re = defaultdict(lambda: [0])
        for var in fgraph.inputs:
            compute_map_re[var][0] = 1

        if getattr(fgraph.profile, 'dependencies', None):
            dependencies = getattr(fgraph.profile, 'dependencies')
        else:
            dependencies = self.compute_gc_dependencies(storage_map)

        reallocated_info = calculate_reallocate_info(order, fgraph, storage_map, compute_map_re,dependencies)

        for node in order:
            try:
                thunks.append(node.op.make_thunk(node,
                                                 storage_map,
                                                 compute_map,
                                                 no_recycling))
                if not hasattr(thunks[-1], 'lazy'):
                    # We don't want all ops maker to think about lazy Ops.
                    # So if they didn't specify that its lazy or not, it isn't.
                    # If this member isn't present, it will crash later.
                    thunks[-1].lazy = False
            except Exception as e:
                e.args = ("The following error happened while"
                          " compiling the node", node, "\n") + e.args
                raise
        for node, thunk in zip(order, thunks):
            thunk.inputs = [storage_map[v] for v in node.inputs]
            thunk.outputs = [storage_map[v] for v in node.outputs]

        lazy = self.lazy
        if lazy is None:
            lazy = config.vm.lazy
        if lazy is None:
            lazy = not all([(not th.lazy) for th in thunks])
        if not (lazy or (config.profile and config.profile_memory) or self.use_cloop or self.callback):
            for pair in reallocated_info.values():
                storage_map[pair[1]] = storage_map[pair[0]]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed)
                            and (input not in fgraph.outputs)
                            and (node == last_user[input])
                            and input not in reallocated_info.keys()):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        vm = self.make_vm(order, thunks,
                          input_storage, output_storage, storage_map,
                          post_thunk_clear,
                          computed,
                          compute_map,
                          self.updated_vars,
                          )

        vm.storage_map = storage_map

        return (vm,
                [link.Container(input, storage)
                 for input, storage in zip(fgraph.inputs, input_storage)],
                [link.Container(output, storage, True)
                 for output, storage in zip(fgraph.outputs, output_storage)],
                thunks,
                order)
Ejemplo n.º 6
0
    def make_all(self, profiler = None, input_storage = None,
            output_storage = None,
            ):
        expanded_inputs=self.expanded_inputs # hacky argumentpassing workaround
        env = self.env
        order = list(env.toposort())
        no_recycling = self.no_recycling

        input_storage, output_storage, storage_map = link.map_storage(
                env, order, input_storage, output_storage)
        compute_map = {}
        for k in storage_map:
            compute_map[k] = [k.owner is None]

        thunks = [node.op.make_thunk(node,
                    storage_map,
                    compute_map,
                    no_recycling)
                        for node in order]

        computed, last_user = link.gc_helper(order)
        if self.allow_gc:
            post_thunk_clear = []
            for node in order:
                clear_after_this_thunk = []
                for input in node.inputs:
                    if ((input in computed)
                            and (input not in env.outputs)
                            and (node == last_user[input])):
                        clear_after_this_thunk.append(storage_map[input])
                post_thunk_clear.append(clear_after_this_thunk)
        else:
            post_thunk_clear = None

        # calculate the update_storage map whose keys are shared var inputs
        # and whose values are the outputs that hold their updates

        updated_vars = {}
        if expanded_inputs:
            # Update the inputs that have an update function
            potential_values = list(env.outputs)
            assert len(expanded_inputs)==len(env.inputs)
            for e_input, ivar in reversed(zip(expanded_inputs, env.inputs)):
                if e_input.update is not None:
                    updated_vars[ivar] = potential_values.pop()

        vm = self.make_vm(order, thunks,
                input_storage, output_storage, storage_map,
                post_thunk_clear,
                computed,
                compute_map,
                updated_vars
                )

        return (vm,
                [link.Container(input, storage)
                    for input, storage in zip(env.inputs, input_storage)],
                [link.Container(output, storage, True)
                    for output, storage in zip(env.outputs, output_storage)],
                thunks,
                order)