Beispiel #1
0
    def make_vertices(self, model, n_steps):
        """Create the vertices to be simulated on the machine."""
        # Create the system region
        self.system_region = SystemRegion(model.machine_timestep,
                                          self.period is not None, n_steps)

        # Get all the outgoing signals to determine how big the size out is and
        # to build a list of keys.
        sigs_conns = model.get_signals_from_object(self)
        if len(sigs_conns) == 0:
            return netlistspec([])

        keys = list()
        self.transmission_parameters = list()
        for sig, transmission_params in sigs_conns[OutputPort.standard]:
            # Add the keys for this connection
            transform, sig_keys = get_transform_keys(sig, transmission_params)
            keys.extend(sig_keys)
            self.transmission_parameters.append((transmission_params,
                                                 transform))
        size_out = len(keys)

        # Build the keys region
        self.keys_region = regions.KeyspacesRegion(
            keys, [regions.KeyField({"cluster": "cluster"})],
            partitioned_by_atom=True
        )

        # Create the output region
        self.output_region = regions.MatrixRegion(
            np.zeros((n_steps, size_out)),
            sliced_dimension=regions.MatrixPartitioning.columns
        )

        self.regions = [self.system_region, self.keys_region,
                        self.output_region]

        # Partition by output dimension to create vertices
        transmit_constraint = partition.Constraint(10)
        sdram_constraint = partition.Constraint(8*2**20)  # Max 8MiB
        constraints = {
            transmit_constraint: lambda s: s.stop - s.start,
            sdram_constraint: (
                lambda s: regions.utils.sizeof_regions(self.regions, s)),
        }
        for sl in partition.partition(slice(0, size_out), constraints):
            # Determine the resources
            resources = {
                Cores: 1,
                SDRAM: regions.utils.sizeof_regions(self.regions, sl),
            }
            vsl = VertexSlice(sl, self._label, get_application("value_source"),
                              resources)
            self.vertices.append(vsl)

        # Return the vertices and callback methods
        return netlistspec(self.vertices, self.load_to_machine,
                           self.before_simulation)
    def make_vertices(self, model, n_steps):
        """Make vertices for the filter."""
        # Get the outgoing transforms and keys
        sigs = model.get_signals_from_object(self)
        if OutputPort.standard in sigs:
            outgoing = sigs[OutputPort.standard]
            transform, output_keys, sigs_pars_slices = \
                get_transforms_and_keys(outgoing)
        else:
            transform = np.array([[]])
            output_keys = list()
            sigs_pars_slices = list()

        size_out = len(output_keys)

        # Calculate how many cores and chips to use.
        if self.n_cores_per_chip is None or self.n_chips is None:
            # The number of cores is largely a function of the input size, we
            # try to ensure that each core is receiving a max of 32 packets per
            # timestep.
            n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0)))

            # The number of chips is now determined by the size in (columns in
            # the transform matrix), the size out (rows in the transform
            # matrix) and the number of cores per chip.
            n_chips = self.n_chips or 1
            n_cores = n_chips * n_cores_per_chip

            while True:
                rows_per_core = int(
                    np.ceil(float(size_out) / (n_cores * n_chips)))
                load_per_core = rows_per_core * self.size_in

                # The 8,000 limits the number of columns in each row that we
                # need to process. This is a heuristic.
                if load_per_core <= 8000 or n_chips > 9:
                    # The load per core is acceptable or we're using way too
                    # many chips
                    break

                if n_cores < 16:
                    # Increase the number of cores per chip if we can
                    n_cores += 1
                else:
                    # Otherwise increase the number of chips
                    n_chips += 1

            # Store the result
            self.n_cores_per_chip = n_cores
            self.n_chips = n_chips

        # Slice the input space into the given number of subspaces, this is
        # repeated on each chip.
        input_slices = list(
            divide_slice(slice(0, self.size_in), self.n_cores_per_chip))

        # Slice the output space into the given number of subspaces, this is
        # sliced across all of the chips.
        output_slices = divide_slice(slice(0, size_out),
                                     self.n_cores_per_chip * self.n_chips)

        # Construct the output keys and transform regions; the output keys and
        # sliced, and the transform is sliced by rows.
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys,
            fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True)
        self.transform_region = regions.MatrixRegion(
            np_to_fix(transform),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Construct the system region
        self.system_region = SystemRegion(self.size_in, model.machine_timestep)

        # Get the incoming filters
        incoming = model.get_signals_to_object(self)
        self.filters_region, self.routing_region = make_filter_regions(
            incoming[InputPort.standard],
            model.dt,
            True,
            model.keyspaces.filter_routing_tag,
            width=self.size_in)

        # Make the vertices and constraints
        iter_output_slices = iter(output_slices)
        cons = list()  # List of constraints

        # For each chip that we'll be using
        for _ in range(self.n_chips):
            chip_vertices = list()

            # Each core is given an input slice and an output slice.  The same
            # set of input slices is used per chip, but we iterate through the
            # whole list of output slices.
            for in_slice, out_slice in zip(input_slices, iter_output_slices):
                # Determine the amount of SDRAM required (the 24 additional
                # bytes are for the application pointer table).  We also
                # include this cores contribution to a shared SDRAM vector.
                sdram = (24 + 4 * (in_slice.stop - in_slice.start) +
                         self.system_region.sizeof() +
                         self.filters_region.sizeof_padded() +
                         self.routing_region.sizeof_padded() +
                         self.output_keys_region.sizeof_padded(out_slice) +
                         self.transform_region.sizeof_padded(out_slice))

                # Create the vertex and include in the list of vertices
                v = ParallelFilterSlice(in_slice, out_slice, {
                    Cores: 1,
                    SDRAM: sdram
                }, sigs_pars_slices)
                chip_vertices.append(v)
                self.vertices.append(v)

            # Create a constraint which will force all of the vertices to exist
            # of the same chip.
            cons.append(SameChipConstraint(chip_vertices))

        # Return the spec
        return netlistspec(self.vertices,
                           self.load_to_machine,
                           constraints=cons)
Beispiel #3
0
    def __init__(self, label, column_slice, output_slice, transform_region,
                 output_keys, output_slices, machine_timestep, filter_region,
                 filter_routing_region):
        """Allocate a portion of the overall matrix to a single processing
        core.

        Parameters
        ----------
        column_slice : :py:class:`slice`
            Columns of the transform matrix managed by the group of vertices of
            which we are a member.
        output_slice : :py:class:`slice`
            Slice of the rows of the transform matrix that will be applied by
            this processing core.
        transform_region : MatrixRegion
        output_keys : [BitField, ...]
            Keys transmitted by filter.
        output_slices : [(TransmissionParameters, set), ...]
            Pairs of transmission parameters and sets containing the row
            indices of the transform matrix corresponding to the transmission
            parameters.
        """
        # Check that the output slice is safe
        assert (output_slice.start is not None
                and output_slice.stop is not None
                and (output_slice.step is None or output_slice.step == 1))

        # Store information about the slices of the for which matrix we're
        # responsible.
        self.output_slice = output_slice
        self.column_slice = column_slice

        # Store which signal parameter slices we contain
        self.transmission_params = set()
        out_set = set(range(output_slice.start, output_slice.stop))
        for transmission_params, outs in output_slices:
            # If there is an intersection between the outs and the set of outs
            # we're responsible for then store transmission parameters.
            if out_set & outs:
                self.transmission_params.add(transmission_params)

        # Construct the regions
        self.regions = {
            Regions.system:
            SystemRegion(column_slice, output_slice, machine_timestep),
            Regions.transform:
            transform_region,
            Regions.keys:
            regions.KeyspacesRegion(
                output_keys,
                fields=[regions.KeyField(dict(cluster="cluster"))],
                partitioned_by_atom=True),
            Regions.input_filters:
            filter_region,
            Regions.input_routing:
            filter_routing_region,
        }

        # Construct the region arguments
        w = self.column_slice.stop - self.column_slice.start
        self.region_arguments = {
            Regions.transform: Args(vertex_slice=self.output_slice),
            Regions.keys: Args(vertex_slice=self.output_slice),
            Regions.system: Args(),  # No arguments
            Regions.input_filters: Args(filter_width=w),  # No arguments
            Regions.input_routing: Args(),  # No arguments
        }

        # Determine the resource requirements and find the correct application
        sdram_usage = regions.utils.sizeof_regions_named(
            self.regions, self.region_arguments)

        super(FilterCore, self).__init__(label=self._label,
                                         application=get_application("filter"),
                                         resources={
                                             Cores: 1,
                                             SDRAM: sdram_usage
                                         })