def make_vertices(self, cycles):
        """Partition the neurons onto multiple cores."""
        # Make reduced constraints to partition against, we don't partition
        # against SDRAM as we're already sure that there is sufficient SDRAM
        # (and if there isn't we can't possibly fit all the vertices on a
        # single chip).
        dtcm_constraint = partition.Constraint(64 * 2**10, 0.9)  # 90% of DTCM
        cpu_constraint = partition.Constraint(cycles, 0.8)  # 80% of compute

        # Get the number of neurons in this cluster
        n_neurons = self.neuron_slice.stop - self.neuron_slice.start

        # Form the constraints dictionary
        def _make_constraint(f, size_in, **kwargs):
            """Wrap a usage computation method to work with the partitioner."""
            def f_(neuron_slice, output_slice):
                # Calculate the number of neurons
                n_neurons = neuron_slice.stop - neuron_slice.start

                # Calculate the number of outgoing dimensions
                size_out = output_slice.stop - output_slice.start

                # Call the original method
                return f(size_in, size_out, n_neurons, **kwargs)
            return f_

        constraints = {
            dtcm_constraint: _make_constraint(_lif_dtcm_usage, self.size_in,
                                              n_neurons_in_cluster=n_neurons),
            cpu_constraint: _make_constraint(_lif_cpu_usage, self.size_in,
                                             n_neurons_in_cluster=n_neurons),
        }

        # Partition the slice of neurons that we have
        self.neuron_slices = list()
        output_slices = list()
        for neurons, outputs in partition.partition_multiple(
                (self.neuron_slice, slice(self.size_out)), constraints):
            self.neuron_slices.append(neurons)
            output_slices.append(outputs)

        n_slices = len(self.neuron_slices)
        assert n_slices <= 16  # Too many cores in the cluster

        # Also partition the input space
        input_slices = partition.divide_slice(slice(0, self.size_in),
                                              n_slices)

        # Zip these together to create the vertices
        all_slices = zip(input_slices, output_slices)
        for i, (in_slice, out_slice) in enumerate(all_slices):
            # Create the vertex
            vertex = EnsembleSlice(i, self.neuron_slices, in_slice,
                                   out_slice, self.regions)

            # Add to the list of vertices
            self.vertices.append(vertex)

        # Return all the vertices
        return self.vertices
    def make_vertices(self, model, n_steps):  # TODO remove n_steps
        """Construct the data which can be loaded into the memory of a
        SpiNNaker machine.
        """
        # Extract all the filters from the incoming connections to build the
        # filter regions.
        signals_conns = model.get_signals_to_object(self)[InputPort.standard]
        filter_region, filter_routing_region = make_filter_regions(
            signals_conns, model.dt, True, model.keyspaces.filter_routing_tag)
        self._routing_region = filter_routing_region

        # Make sufficient vertices to ensure that each has a size_in of less
        # than max_width.
        n_vertices = (
            (self.size_in // self.max_width) +
            (1 if self.size_in % self.max_width else 0)
        )
        self.vertices = tuple(
            ValueSinkVertex(model.machine_timestep, n_steps, sl, filter_region,
                            filter_routing_region) for sl in
            divide_slice(slice(0, self.size_in), n_vertices)
        )

        # Return the spec
        return netlistspec(self.vertices, self.load_to_machine,
                           after_simulation_function=self.after_simulation)
Beispiel #3
0
    def make_vertices(self, output_signals, machine_timestep, filter_region,
                      filter_routing_region):
        """Partition the transform matrix into groups of rows and assign each
        group of rows to a core for computation.

        If the group needs to be split over multiple chips (i.e., the group is
        larger than 17 cores) then partition the matrix such that any used
        chips are used in their entirety.
        """
        if OutputPort.standard not in output_signals:
            self.cores = list()
        else:
            # Get the output transform, keys and slices for this slice of the
            # filter.
            transform, keys, output_slices = \
                get_transforms_and_keys(output_signals[OutputPort.standard],
                                        self.column_slice)

            size_out = transform.shape[0]

            # Build as many vertices as required to keep the number of rows
            # handled by each core below max_rows.
            n_cores = (
                (size_out // self.max_rows) +
                (1 if size_out % self.max_rows else 0)
            )

            # Build the transform region for these cores
            transform_region = regions.MatrixRegion(
                np_to_fix(transform),
                sliced_dimension=regions.MatrixPartitioning.rows
            )

            # Build all the vertices
            self.cores = [
                FilterCore(self.column_slice, out_slice,
                           transform_region, keys, output_slices,
                           machine_timestep,
                           filter_region, filter_routing_region) for
                out_slice in divide_slice(slice(0, size_out), n_cores)
            ]

        return self.cores
Beispiel #4
0
    def make_vertices(self, output_signals, machine_timestep, filter_region,
                      filter_routing_region):
        """Partition the transform matrix into groups of rows and assign each
        group of rows to a core for computation.

        If the group needs to be split over multiple chips (i.e., the group is
        larger than 17 cores) then partition the matrix such that any used
        chips are used in their entirety.
        """
        if OutputPort.standard not in output_signals:
            self.cores = list()
        else:
            # Get the output transform, keys and slices for this slice of the
            # filter.
            transform, keys, output_slices = \
                get_transforms_and_keys(output_signals[OutputPort.standard],
                                        self.column_slice)

            size_out = transform.shape[0]

            # Build as many vertices as required to keep the number of rows
            # handled by each core below max_rows.
            n_cores = ((size_out // self.max_rows) +
                       (1 if size_out % self.max_rows else 0))

            # Build the transform region for these cores
            transform_region = regions.MatrixRegion(
                np_to_fix(transform),
                sliced_dimension=regions.MatrixPartitioning.rows)

            # Build all the vertices
            self.cores = [
                FilterCore(self._label, self.column_slice, out_slice,
                           transform_region, keys, output_slices,
                           machine_timestep, filter_region,
                           filter_routing_region)
                for out_slice in divide_slice(slice(0, size_out), n_cores)
            ]

        return self.cores
Beispiel #5
0
    def __init__(self, size_in, max_cols=128, max_rows=64):
        """Create a new parallel Filter.

        Parameters
        ----------
        size_in : int
            Width of the filter (length of any incoming signals).
        max_cols : int
        max_rows : int
            Maximum number of columns and rows which may be handled by a single
            processing core. The defaults (128 and 64 respectively) result in
            the overall connection matrix being decomposed such that (a) blocks
            are sufficiently small to be stored in DTCM, (b) network traffic is
            reduced.
        """
        # NB: max_rows and max_cols determined by experimentation by AM and
        # some modelling by SBF.
        # Create as many groups as necessary to keep the size in of any group
        # less than max_cols.
        self.size_in = size_in
        n_groups = (size_in // max_cols) + (1 if size_in % max_cols else 0)
        self.groups = tuple(FilterGroup(sl, max_rows) for sl in
                            divide_slice(slice(0, size_in), n_groups))
    def make_vertices(self, model, n_steps):  # TODO remove n_steps
        """Construct the data which can be loaded into the memory of a
        SpiNNaker machine.
        """
        # Extract all the filters from the incoming connections to build the
        # filter regions.
        signals_conns = model.get_signals_to_object(self)[InputPort.standard]
        filter_region, filter_routing_region = make_filter_regions(
            signals_conns, model.dt, True, model.keyspaces.filter_routing_tag)
        self._routing_region = filter_routing_region

        # Make sufficient vertices to ensure that each has a size_in of less
        # than max_width.
        n_vertices = ((self.size_in // self.max_width) +
                      (1 if self.size_in % self.max_width else 0))
        self.vertices = tuple(
            ValueSinkVertex(model.machine_timestep, n_steps, sl, filter_region,
                            filter_routing_region)
            for sl in divide_slice(slice(0, self.size_in), n_vertices))

        # Return the spec
        return netlistspec(self.vertices,
                           self.load_to_machine,
                           after_simulation_function=self.after_simulation)
Beispiel #7
0
    def __init__(self, size_in, label, max_cols=128, max_rows=64):
        """Create a new parallel Filter.

        Parameters
        ----------
        size_in : int
            Width of the filter (length of any incoming signals).
        max_cols : int
        max_rows : int
            Maximum number of columns and rows which may be handled by a single
            processing core. The defaults (128 and 64 respectively) result in
            the overall connection matrix being decomposed such that (a) blocks
            are sufficiently small to be stored in DTCM, (b) network traffic is
            reduced.
        """
        # NB: max_rows and max_cols determined by experimentation by AM and
        # some modelling by SBF.
        # Create as many groups as necessary to keep the size in of any group
        # less than max_cols.
        self.size_in = size_in
        n_groups = (size_in // max_cols) + (1 if size_in % max_cols else 0)
        self.groups = tuple(
            FilterGroup(sl, max_rows, "interposer")
            for sl in divide_slice(slice(0, size_in), n_groups))
def test_divide_slice(start, stop, n_items):
    slices = list(pac.divide_slice(slice(start, stop), n_items))
    assert slices[0].start == start
    assert slices[-1].stop == stop
    assert len(slices) == n_items
    def make_vertices(self, model, n_steps):
        """Make vertices for the filter."""
        # Get the outgoing transforms and keys
        sigs = model.get_signals_from_object(self)
        if OutputPort.standard in sigs:
            outgoing = sigs[OutputPort.standard]
            transform, output_keys, sigs_pars_slices = \
                get_transforms_and_keys(outgoing)
        else:
            transform = np.array([[]])
            output_keys = list()
            sigs_pars_slices = list()

        size_out = len(output_keys)

        # Calculate how many cores and chips to use.
        if self.n_cores_per_chip is None or self.n_chips is None:
            # The number of cores is largely a function of the input size, we
            # try to ensure that each core is receiving a max of 32 packets per
            # timestep.
            n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0)))

            # The number of chips is now determined by the size in (columns in
            # the transform matrix), the size out (rows in the transform
            # matrix) and the number of cores per chip.
            n_chips = self.n_chips or 1
            n_cores = n_chips * n_cores_per_chip

            while True:
                rows_per_core = int(
                    np.ceil(float(size_out) / (n_cores * n_chips)))
                load_per_core = rows_per_core * self.size_in

                # The 8,000 limits the number of columns in each row that we
                # need to process. This is a heuristic.
                if load_per_core <= 8000 or n_chips > 9:
                    # The load per core is acceptable or we're using way too
                    # many chips
                    break

                if n_cores < 16:
                    # Increase the number of cores per chip if we can
                    n_cores += 1
                else:
                    # Otherwise increase the number of chips
                    n_chips += 1

            # Store the result
            self.n_cores_per_chip = n_cores
            self.n_chips = n_chips

        # Slice the input space into the given number of subspaces, this is
        # repeated on each chip.
        input_slices = list(
            divide_slice(slice(0, self.size_in), self.n_cores_per_chip))

        # Slice the output space into the given number of subspaces, this is
        # sliced across all of the chips.
        output_slices = divide_slice(slice(0, size_out),
                                     self.n_cores_per_chip * self.n_chips)

        # Construct the output keys and transform regions; the output keys and
        # sliced, and the transform is sliced by rows.
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys,
            fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True)
        self.transform_region = regions.MatrixRegion(
            np_to_fix(transform),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Construct the system region
        self.system_region = SystemRegion(self.size_in, model.machine_timestep)

        # Get the incoming filters
        incoming = model.get_signals_to_object(self)
        self.filters_region, self.routing_region = make_filter_regions(
            incoming[InputPort.standard],
            model.dt,
            True,
            model.keyspaces.filter_routing_tag,
            width=self.size_in)

        # Make the vertices and constraints
        iter_output_slices = iter(output_slices)
        cons = list()  # List of constraints

        # For each chip that we'll be using
        for _ in range(self.n_chips):
            chip_vertices = list()

            # Each core is given an input slice and an output slice.  The same
            # set of input slices is used per chip, but we iterate through the
            # whole list of output slices.
            for in_slice, out_slice in zip(input_slices, iter_output_slices):
                # Determine the amount of SDRAM required (the 24 additional
                # bytes are for the application pointer table).  We also
                # include this cores contribution to a shared SDRAM vector.
                sdram = (24 + 4 * (in_slice.stop - in_slice.start) +
                         self.system_region.sizeof() +
                         self.filters_region.sizeof_padded() +
                         self.routing_region.sizeof_padded() +
                         self.output_keys_region.sizeof_padded(out_slice) +
                         self.transform_region.sizeof_padded(out_slice))

                # Create the vertex and include in the list of vertices
                v = ParallelFilterSlice(in_slice, out_slice, {
                    Cores: 1,
                    SDRAM: sdram
                }, sigs_pars_slices)
                chip_vertices.append(v)
                self.vertices.append(v)

            # Create a constraint which will force all of the vertices to exist
            # of the same chip.
            cons.append(SameChipConstraint(chip_vertices))

        # Return the spec
        return netlistspec(self.vertices,
                           self.load_to_machine,
                           constraints=cons)
def test_divide_slice(start, stop, n_items):
    slices = list(pac.divide_slice(slice(start, stop), n_items))
    assert slices[0].start == start
    assert slices[-1].stop == stop
    assert len(slices) == n_items
Beispiel #11
0
    def make_vertices(self, cycles):
        """Partition the neurons onto multiple cores."""
        # Make reduced constraints to partition against, we don't partition
        # against SDRAM as we're already sure that there is sufficient SDRAM
        # (and if there isn't we can't possibly fit all the vertices on a
        # single chip).
        dtcm_constraint = partition.Constraint(64 * 2**10, 0.9)  # 90% of DTCM
        cpu_constraint = partition.Constraint(cycles, 0.8)  # 80% of compute

        # Get the number of neurons in this cluster
        n_neurons = self.neuron_slice.stop - self.neuron_slice.start

        # Form the constraints dictionary
        def _make_constraint(f, size_in, **kwargs):
            """Wrap a usage computation method to work with the partitioner."""
            def f_(neuron_slice, output_slice):
                # Calculate the number of neurons
                n_neurons = neuron_slice.stop - neuron_slice.start

                # Calculate the number of outgoing dimensions
                size_out = output_slice.stop - output_slice.start

                # Call the original method
                return f(size_in, size_out, n_neurons, **kwargs)

            return f_

        constraints = {
            dtcm_constraint:
            _make_constraint(_lif_dtcm_usage,
                             self.size_in,
                             n_neurons_in_cluster=n_neurons),
            cpu_constraint:
            _make_constraint(_lif_cpu_usage,
                             self.size_in,
                             n_neurons_in_cluster=n_neurons),
        }

        # Partition the slice of neurons that we have
        self.neuron_slices = list()
        output_slices = list()
        for neurons, outputs in partition.partition_multiple(
            (self.neuron_slice, slice(self.size_out)), constraints):
            self.neuron_slices.append(neurons)
            output_slices.append(outputs)

        n_slices = len(self.neuron_slices)
        assert n_slices <= 16  # Too many cores in the cluster

        # Also partition the input space
        input_slices = partition.divide_slice(slice(0, self.size_in), n_slices)

        # Zip these together to create the vertices
        all_slices = zip(input_slices, output_slices)
        for i, (in_slice, out_slice) in enumerate(all_slices):
            # Create the vertex
            vertex = EnsembleSlice(i, self.neuron_slices, in_slice, out_slice,
                                   self.regions)

            # Add to the list of vertices
            self.vertices.append(vertex)

        # Return all the vertices
        return self.vertices
    def make_vertices(self, model, n_steps):
        """Make vertices for the filter."""
        # Get the outgoing transforms and keys
        sigs = model.get_signals_from_object(self)
        if OutputPort.standard in sigs:
            outgoing = sigs[OutputPort.standard]
            transform, output_keys, sigs_pars_slices = \
                get_transforms_and_keys(outgoing)
        else:
            transform = np.array([[]])
            output_keys = list()
            sigs_pars_slices = list()

        size_out = len(output_keys)

        # Calculate how many cores and chips to use.
        if self.n_cores_per_chip is None or self.n_chips is None:
            # The number of cores is largely a function of the input size, we
            # try to ensure that each core is receiving a max of 32 packets per
            # timestep.
            n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0)))

            # The number of chips is now determined by the size in (columns in
            # the transform matrix), the size out (rows in the transform
            # matrix) and the number of cores per chip.
            n_chips = self.n_chips or 1
            n_cores = n_chips * n_cores_per_chip

            while True:
                rows_per_core = int(np.ceil(float(size_out) /
                                            (n_cores * n_chips)))
                load_per_core = rows_per_core * self.size_in

                # The 8,000 limits the number of columns in each row that we
                # need to process. This is a heuristic.
                if load_per_core <= 8000 or n_chips > 9:
                    # The load per core is acceptable or we're using way too
                    # many chips
                    break

                if n_cores < 16:
                    # Increase the number of cores per chip if we can
                    n_cores += 1
                else:
                    # Otherwise increase the number of chips
                    n_chips += 1

            # Store the result
            self.n_cores_per_chip = n_cores
            self.n_chips = n_chips

        # Slice the input space into the given number of subspaces, this is
        # repeated on each chip.
        input_slices = list(divide_slice(slice(0, self.size_in),
                                         self.n_cores_per_chip))

        # Slice the output space into the given number of subspaces, this is
        # sliced across all of the chips.
        output_slices = divide_slice(slice(0, size_out),
                                     self.n_cores_per_chip * self.n_chips)

        # Construct the output keys and transform regions; the output keys and
        # sliced, and the transform is sliced by rows.
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys, fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True
        )
        self.transform_region = regions.MatrixRegion(
            np_to_fix(transform),
            sliced_dimension=regions.MatrixPartitioning.rows
        )

        # Construct the system region
        self.system_region = SystemRegion(self.size_in, model.machine_timestep)

        # Get the incoming filters
        incoming = model.get_signals_to_object(self)
        self.filters_region, self.routing_region = make_filter_regions(
            incoming[InputPort.standard], model.dt, True,
            model.keyspaces.filter_routing_tag, width=self.size_in
        )

        # Make the vertices and constraints
        iter_output_slices = iter(output_slices)
        cons = list()  # List of constraints

        # For each chip that we'll be using
        for _ in range(self.n_chips):
            chip_vertices = list()

            # Each core is given an input slice and an output slice.  The same
            # set of input slices is used per chip, but we iterate through the
            # whole list of output slices.
            for in_slice, out_slice in zip(input_slices,
                                           iter_output_slices):
                # Determine the amount of SDRAM required (the 24 additional
                # bytes are for the application pointer table).  We also
                # include this cores contribution to a shared SDRAM vector.
                sdram = (24 + 4*(in_slice.stop - in_slice.start) +
                         self.system_region.sizeof() +
                         self.filters_region.sizeof_padded() +
                         self.routing_region.sizeof_padded() +
                         self.output_keys_region.sizeof_padded(out_slice) +
                         self.transform_region.sizeof_padded(out_slice))

                # Create the vertex and include in the list of vertices
                v = ParallelFilterSlice(in_slice, out_slice,
                                        {Cores: 1, SDRAM: sdram},
                                        sigs_pars_slices)
                chip_vertices.append(v)
                self.vertices.append(v)

            # Create a constraint which will force all of the vertices to exist
            # of the same chip.
            cons.append(SameChipConstraint(chip_vertices))

        # Return the spec
        return netlistspec(self.vertices, self.load_to_machine,
                           constraints=cons)