def _launch_job(self, n_boards, spalloc_kw_args):
     job = Job(n_boards, **spalloc_kw_args)
     try:
         job.wait_until_ready()
         # get param from jobs before starting, so that hanging doesn't
         # occur
         return job, job.hostname
     except Exception:
         job.destroy()
         raise
예제 #2
0
def run_job(job_args, job_kwargs, ip_file_filename):
    # Reason for destroying the job
    reason = None

    # Create the job
    try:
        job = Job(*job_args, **job_kwargs)
    except (OSError, IOError, ProtocolError, ProtocolTimeoutError) as e:
        info(t.red("Could not connect to server: {}".format(e)))
        return 6

    try:
        # Wait for it to become ready, keeping the user informed along the
        # way
        code, reason = wait_for_job_ready(job)
        if code != 0:
            return code

        # Machine is now ready
        write_ips_to_csv(job.connections, ip_file_filename)

        # Boot the machine if required
        if MachineController is not None and args.boot:
            update("Job {}: Booting...", t.yellow, job.id)
            mc = MachineController(job.hostname)
            mc.boot(job.width, job.height)

        update("Job {}: Ready!", t.green, job.id)

        # Either run the user's application or just print the details.
        if not args.command:
            print_info(job.machine_name, job.connections,
                       job.width, job.height, ip_file_filename)
            return 0
        return run_command(args.command, job.id, job.machine_name,
                           job.connections, job.width, job.height,
                           ip_file_filename)
    finally:
        # Destroy job and disconnect client
        if args.no_destroy:
            job.close()
        else:
            job.destroy(reason)
예제 #3
0
 def _launch_job(self, n_boards, spalloc_kw_args):
     job = Job(n_boards, **spalloc_kw_args)
     try:
         job.wait_until_ready()
         # get param from jobs before starting, so that hanging doesn't
         # occur
         return job, job.hostname
     except:
         job.destroy()
         raise
 def _launch_job(self, n_boards, spalloc_kw_args):
     """
     :param int n_boards:
     :param dict(str, str or int) spalloc_kw_args:
     :rtype: tuple(~.Job, str)
     """
     job = Job(n_boards, **spalloc_kw_args)
     try:
         job.wait_until_ready()
         # get param from jobs before starting, so that hanging doesn't
         # occur
         return job, job.hostname
     except Exception:
         job.destroy()
         raise
예제 #5
0
    def __init__(self,
                 network,
                 dt=0.001,
                 period=10.0,
                 timescale=1.0,
                 hostname=None,
                 use_spalloc=None,
                 allocation_fudge_factor=0.6):
        """Create a new Simulator with the given network.

        Parameters
        ----------
        period : float or None
            Duration of one period of the simulator. This determines how much
            memory will be allocated to store precomputed and probed data.
        timescale : float
            Scaling factor to apply to the simulation, e.g., a value of `0.5`
            will cause the simulation to run at half real-time.
        hostname : string or None
            Hostname of the SpiNNaker machine to use; if None then the machine
            specified in the config file will be used.
        use_spalloc : bool or None
            Allocate a SpiNNaker machine for the simulator using ``spalloc``.
            If None then the setting specified in the config file will be used.

        Other Parameters
        ----------------
        allocation_fudge_factor:
           Fudge factor to allocate more cores than really necessary when using
           `spalloc` to ensure that (a) there are sufficient "live" cores in
           the allocated machine, (b) there is sufficient room for a good place
           and route solution. This should generally be more than 0.1 (10% more
           cores than necessary) to account for the usual rate of missing
           chips.
        """
        # Add this simulator to the set of open simulators
        Simulator._add_simulator(self)

        # Create the IO controller
        io_cls = getconfig(network.config, Simulator, "node_io", Ethernet)
        io_kwargs = getconfig(network.config, Simulator, "node_io_kwargs",
                              dict())
        self.io_controller = io_cls(**io_kwargs)

        # Calculate the machine timestep, this is measured in microseconds
        # (hence the 1e6 scaling factor).
        self.timescale = timescale
        machine_timestep = int((dt / timescale) * 1e6)

        # Determine the maximum run-time
        self.max_steps = None if period is None else int(period / dt)

        self.steps = 0  # Steps simulated

        # If the simulator is in "run indefinite" mode (i.e., max_steps=None)
        # then we modify the builders to ignore function of time Nodes and
        # probes.
        builder_kwargs = self.io_controller.builder_kwargs
        if self.max_steps is None:
            raise NotImplementedError

        # Create a model from the network, using the IO controller
        logger.debug("Building model")
        start_build = time.time()
        self.model = Model(dt=dt,
                           machine_timestep=machine_timestep,
                           decoder_cache=get_default_decoder_cache())
        self.model.build(network, **builder_kwargs)

        logger.info("Build took {:.3f} seconds".format(time.time() -
                                                       start_build))

        self.model.decoder_cache.shrink()
        self.dt = self.model.dt
        self._closed = False  # Whether the simulator has been closed or not

        self.host_sim = self._create_host_sim()

        # Holder for probe data
        self.data = {}

        # Holder for profiling data
        self.profiler_data = {}

        # Convert the model into a netlist
        logger.info("Building netlist")
        start = time.time()
        self.netlist = self.model.make_netlist(self.max_steps or 0)

        # Determine whether to use a spalloc machine or not
        if use_spalloc is None:
            # Default is to not use spalloc; this is indicated by either the
            # absence of the option in the config file OR the option being set
            # to false.
            use_spalloc = (rc.has_option("spinnaker_machine", "use_spalloc")
                           and rc.getboolean("spinnaker_machine",
                                             "use_spalloc"))

        # Create a controller for the machine and boot if necessary
        self.job = None
        if not use_spalloc or hostname is not None:
            # Use the specified machine rather than trying to get one
            # allocated.
            if hostname is None:
                hostname = rc.get("spinnaker_machine", "hostname")
        else:
            # Attempt to get a machine allocated to us
            from spalloc import Job

            # Determine how many boards to ask for (assuming 16 usable cores
            # per chip and 48 chips per board).
            n_cores = self.netlist.n_cores * (1.0 + allocation_fudge_factor)
            n_boards = int(np.ceil((n_cores / 16.) / 48.))

            # Request the job
            self.job = Job(n_boards)
            logger.info("Allocated job ID %d...", self.job.id)

            # Wait until we're given the machine
            logger.info("Waiting for machine allocation...")
            self.job.wait_until_ready()

            # spalloc recommends a slight delay before attempting to boot the
            # machine, later versions of spalloc server may relax this
            # requirement.
            time.sleep(5.0)

            # Store the hostname
            hostname = self.job.hostname
            logger.info("Using %d board(s) of \"%s\" (%s)",
                        len(self.job.boards), self.job.machine_name, hostname)

        self.controller = MachineController(hostname)
        self.controller.boot()

        # Get a system-info object to place & route against
        logger.info("Getting SpiNNaker machine specification")
        system_info = self.controller.get_system_info()

        # Place & Route
        logger.info("Placing and routing")
        self.netlist.place_and_route(
            system_info,
            place=getconfig(network.config, Simulator, 'placer',
                            rig.place_and_route.place),
            place_kwargs=getconfig(network.config, Simulator, 'placer_kwargs',
                                   {}),
        )

        logger.info("{} cores in use".format(len(self.netlist.placements)))
        chips = set(six.itervalues(self.netlist.placements))
        logger.info("Using {}".format(chips))

        # Prepare the simulator against the placed, allocated and routed
        # netlist.
        self.io_controller.prepare(self.model, self.controller, self.netlist)

        # Load the application
        logger.info("Loading application")
        self.netlist.load_application(self.controller, system_info)

        # Check if any cores are in bad states
        if self.controller.count_cores_in_state(
            ["exit", "dead", "watchdog", "runtime_exception"]):
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not AppState.sync0:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status))
                    print(self.controller.get_iobuf(p, x, y))
            raise Exception("Unexpected core failures.")

        logger.info("Preparing and loading machine took {:3f} seconds".format(
            time.time() - start))

        logger.info("Setting router timeout to 16 cycles")
        for x, y in system_info.chips():
            with self.controller(x=x, y=y):
                data = self.controller.read(0xf1000000, 4)
                self.controller.write(0xf1000000, data[:-1] + b'\x10')
예제 #6
0
class Simulator(object):
    """SpiNNaker simulator for Nengo models.

    The simulator period determines how much data will be stored on SpiNNaker
    and is the maximum length of simulation allowed before data is transferred
    between the machine and the host PC. If the period is set to `None`
    function of time Nodes will not be optimised and probes will be disabled.
    For any other value simulation lengths of less than or equal to the period
    will be in real-time, longer simulations will be possible but will include
    short gaps when data is transferred between SpiNNaker and the host.

    :py:meth:`~.close` should be called when the simulator will no longer be
    used. This will close all sockets used to communicate with the SpiNNaker
    machine and will leave the machine in a clean state. Failure to call
    `close` may result in later failures. Alternatively `with` may be used::

        sim = nengo_spinnaker.Simulator(network)
        with sim:
            sim.run(10.0)
    """
    _open_simulators = set()

    @classmethod
    def _add_simulator(cls, simulator):
        cls._open_simulators.add(simulator)

    @classmethod
    def _remove_simulator(cls, simulator):
        cls._open_simulators.remove(simulator)

    def __init__(self,
                 network,
                 dt=0.001,
                 period=10.0,
                 timescale=1.0,
                 hostname=None,
                 use_spalloc=None,
                 allocation_fudge_factor=0.6):
        """Create a new Simulator with the given network.

        Parameters
        ----------
        period : float or None
            Duration of one period of the simulator. This determines how much
            memory will be allocated to store precomputed and probed data.
        timescale : float
            Scaling factor to apply to the simulation, e.g., a value of `0.5`
            will cause the simulation to run at half real-time.
        hostname : string or None
            Hostname of the SpiNNaker machine to use; if None then the machine
            specified in the config file will be used.
        use_spalloc : bool or None
            Allocate a SpiNNaker machine for the simulator using ``spalloc``.
            If None then the setting specified in the config file will be used.

        Other Parameters
        ----------------
        allocation_fudge_factor:
           Fudge factor to allocate more cores than really necessary when using
           `spalloc` to ensure that (a) there are sufficient "live" cores in
           the allocated machine, (b) there is sufficient room for a good place
           and route solution. This should generally be more than 0.1 (10% more
           cores than necessary) to account for the usual rate of missing
           chips.
        """
        # Add this simulator to the set of open simulators
        Simulator._add_simulator(self)

        # Create the IO controller
        io_cls = getconfig(network.config, Simulator, "node_io", Ethernet)
        io_kwargs = getconfig(network.config, Simulator, "node_io_kwargs",
                              dict())
        self.io_controller = io_cls(**io_kwargs)

        # Calculate the machine timestep, this is measured in microseconds
        # (hence the 1e6 scaling factor).
        self.timescale = timescale
        machine_timestep = int((dt / timescale) * 1e6)

        # Determine the maximum run-time
        self.max_steps = None if period is None else int(period / dt)

        self.steps = 0  # Steps simulated

        # If the simulator is in "run indefinite" mode (i.e., max_steps=None)
        # then we modify the builders to ignore function of time Nodes and
        # probes.
        builder_kwargs = self.io_controller.builder_kwargs
        if self.max_steps is None:
            raise NotImplementedError

        # Create a model from the network, using the IO controller
        logger.debug("Building model")
        start_build = time.time()
        self.model = Model(dt=dt,
                           machine_timestep=machine_timestep,
                           decoder_cache=get_default_decoder_cache())
        self.model.build(network, **builder_kwargs)

        logger.info("Build took {:.3f} seconds".format(time.time() -
                                                       start_build))

        self.model.decoder_cache.shrink()
        self.dt = self.model.dt
        self._closed = False  # Whether the simulator has been closed or not

        self.host_sim = self._create_host_sim()

        # Holder for probe data
        self.data = {}

        # Holder for profiling data
        self.profiler_data = {}

        # Convert the model into a netlist
        logger.info("Building netlist")
        start = time.time()
        self.netlist = self.model.make_netlist(self.max_steps or 0)

        # Determine whether to use a spalloc machine or not
        if use_spalloc is None:
            # Default is to not use spalloc; this is indicated by either the
            # absence of the option in the config file OR the option being set
            # to false.
            use_spalloc = (rc.has_option("spinnaker_machine", "use_spalloc")
                           and rc.getboolean("spinnaker_machine",
                                             "use_spalloc"))

        # Create a controller for the machine and boot if necessary
        self.job = None
        if not use_spalloc or hostname is not None:
            # Use the specified machine rather than trying to get one
            # allocated.
            if hostname is None:
                hostname = rc.get("spinnaker_machine", "hostname")
        else:
            # Attempt to get a machine allocated to us
            from spalloc import Job

            # Determine how many boards to ask for (assuming 16 usable cores
            # per chip and 48 chips per board).
            n_cores = self.netlist.n_cores * (1.0 + allocation_fudge_factor)
            n_boards = int(np.ceil((n_cores / 16.) / 48.))

            # Request the job
            self.job = Job(n_boards)
            logger.info("Allocated job ID %d...", self.job.id)

            # Wait until we're given the machine
            logger.info("Waiting for machine allocation...")
            self.job.wait_until_ready()

            # spalloc recommends a slight delay before attempting to boot the
            # machine, later versions of spalloc server may relax this
            # requirement.
            time.sleep(5.0)

            # Store the hostname
            hostname = self.job.hostname
            logger.info("Using %d board(s) of \"%s\" (%s)",
                        len(self.job.boards), self.job.machine_name, hostname)

        self.controller = MachineController(hostname)
        self.controller.boot()

        # Get a system-info object to place & route against
        logger.info("Getting SpiNNaker machine specification")
        system_info = self.controller.get_system_info()

        # Place & Route
        logger.info("Placing and routing")
        self.netlist.place_and_route(
            system_info,
            place=getconfig(network.config, Simulator, 'placer',
                            rig.place_and_route.place),
            place_kwargs=getconfig(network.config, Simulator, 'placer_kwargs',
                                   {}),
        )

        logger.info("{} cores in use".format(len(self.netlist.placements)))
        chips = set(six.itervalues(self.netlist.placements))
        logger.info("Using {}".format(chips))

        # Prepare the simulator against the placed, allocated and routed
        # netlist.
        self.io_controller.prepare(self.model, self.controller, self.netlist)

        # Load the application
        logger.info("Loading application")
        self.netlist.load_application(self.controller, system_info)

        # Check if any cores are in bad states
        if self.controller.count_cores_in_state(
            ["exit", "dead", "watchdog", "runtime_exception"]):
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not AppState.sync0:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status))
                    print(self.controller.get_iobuf(p, x, y))
            raise Exception("Unexpected core failures.")

        logger.info("Preparing and loading machine took {:3f} seconds".format(
            time.time() - start))

        logger.info("Setting router timeout to 16 cycles")
        for x, y in system_info.chips():
            with self.controller(x=x, y=y):
                data = self.controller.read(0xf1000000, 4)
                self.controller.write(0xf1000000, data[:-1] + b'\x10')

    def __enter__(self):
        """Enter a context which will close the simulator when exited."""
        # Return self to allow usage like:
        #
        #     with nengo_spinnaker.Simulator(model) as sim:
        #         sim.run(1.0)
        return self

    def __exit__(self, exception_type, exception_value, traceback):
        """Exit a context and close the simulator."""
        self.close()

    def run(self, time_in_seconds):
        """Simulate for the given length of time."""
        # Determine how many steps to simulate for
        steps = int(np.round(float(time_in_seconds) / self.dt))
        self.run_steps(steps)

    def run_steps(self, steps):
        """Simulate a give number of steps."""
        while steps > 0:
            n_steps = min((steps, self.max_steps))
            self._run_steps(n_steps)
            steps -= n_steps

    def _run_steps(self, steps):
        """Simulate for the given number of steps."""
        if self._closed:
            raise Exception("Simulator has been closed and can't be used to "
                            "run further simulations.")

        if steps is None:
            if self.max_steps is not None:
                raise Exception(
                    "Cannot run indefinitely if a simulator period was "
                    "specified. Create a new simulator with Simulator(model, "
                    "period=None) to perform indefinite time simulations.")
        else:
            assert steps <= self.max_steps

        # Prepare the simulation
        self.netlist.before_simulation(self, steps)

        # Wait for all cores to hit SYNC0 (either by remaining it or entering
        # it from init)
        self._wait_for_transition(AppState.init, AppState.sync0,
                                  self.netlist.n_cores)
        self.controller.send_signal("sync0")

        # Get a new thread for the IO
        io_thread = self.io_controller.spawn()

        # Run the simulation
        try:
            # Prep
            exp_time = steps * self.dt / self.timescale
            io_thread.start()

            # Wait for all cores to hit SYNC1
            self._wait_for_transition(AppState.sync0, AppState.sync1,
                                      self.netlist.n_cores)
            logger.info("Running simulation...")
            self.controller.send_signal("sync1")

            # Execute the local model
            host_steps = 0
            start_time = time.time()
            run_time = 0.0
            local_timestep = self.dt / self.timescale
            while run_time < exp_time:
                # Run a step
                self.host_sim.step()
                run_time = time.time() - start_time

                # If that step took less than timestep then spin
                time.sleep(0.0001)
                while run_time < host_steps * local_timestep:
                    time.sleep(0.0001)
                    run_time = time.time() - start_time
        finally:
            # Stop the IO thread whatever occurs
            io_thread.stop()

        # Wait for cores to re-enter sync0
        self._wait_for_transition(AppState.run, AppState.sync0,
                                  self.netlist.n_cores)

        # Retrieve simulation data
        start = time.time()
        logger.info("Retrieving simulation data")
        self.netlist.after_simulation(self, steps)
        logger.info("Retrieving data took {:3f} seconds".format(time.time() -
                                                                start))

        # Increase the steps count
        self.steps += steps

    def _wait_for_transition(self, from_state, desired_to_state, num_verts):
        while True:
            # If no cores are still in from_state, stop
            if self.controller.count_cores_in_state(from_state) == 0:
                break

            # Wait a bit
            time.sleep(1.0)

        # Check if any cores haven't exited cleanly
        num_ready = self.controller.wait_for_cores_to_reach_state(
            desired_to_state, num_verts, timeout=5.0)

        if num_ready != num_verts:
            # Loop through all placed vertices
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not desired_to_state:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status.cpu_state))
                    print(self.controller.get_iobuf(p, x, y))

            raise Exception("Unexpected core failures before reaching %s "
                            "state." % desired_to_state)

    def _create_host_sim(self):
        # change node_functions to reflect time
        # TODO: improve the reference simulator so that this is not needed
        #       by adding a realtime option
        node_functions = {}
        node_info = dict(start=None)
        for node in self.io_controller.host_network.all_nodes:
            if callable(node.output):
                old_func = node.output
                if node.size_in == 0:

                    def func(t, f=old_func):
                        now = time.time()
                        if node_info['start'] is None:
                            node_info['start'] = now

                        t = (now - node_info['start']) * self.timescale
                        return f(t)
                else:

                    def func(t, x, f=old_func):
                        now = time.time()
                        if node_info['start'] is None:
                            node_info['start'] = now

                        t = (now - node_info['start']) * self.timescale
                        return f(t, x)

                node.output = func
                node_functions[node] = old_func

        # Build the host simulator
        host_sim = nengo.Simulator(self.io_controller.host_network, dt=self.dt)
        # reset node functions
        for node, func in node_functions.items():
            node.output = func

        return host_sim

    def close(self):
        """Clean the SpiNNaker board and prevent further simulation."""
        if not self._closed:
            # Stop the application
            self._closed = True
            self.io_controller.close()
            self.controller.send_signal("stop")

            # Destroy the job if we allocated one
            if self.job is not None:
                self.job.destroy()

            # Remove this simulator from the list of open simulators
            Simulator._remove_simulator(self)

    def trange(self, dt=None):
        return np.arange(1, self.steps + 1) * (self.dt or dt)
예제 #7
0
    def __init__(self, network, dt=0.001, period=10.0, timescale=1.0,
                 hostname=None, use_spalloc=None,
                 allocation_fudge_factor=0.6):
        """Create a new Simulator with the given network.

        Parameters
        ----------
        period : float or None
            Duration of one period of the simulator. This determines how much
            memory will be allocated to store precomputed and probed data.
        timescale : float
            Scaling factor to apply to the simulation, e.g., a value of `0.5`
            will cause the simulation to run at half real-time.
        hostname : string or None
            Hostname of the SpiNNaker machine to use; if None then the machine
            specified in the config file will be used.
        use_spalloc : bool or None
            Allocate a SpiNNaker machine for the simulator using ``spalloc``.
            If None then the setting specified in the config file will be used.

        Other Parameters
        ----------------
        allocation_fudge_factor:
           Fudge factor to allocate more cores than really necessary when using
           `spalloc` to ensure that (a) there are sufficient "live" cores in
           the allocated machine, (b) there is sufficient room for a good place
           and route solution. This should generally be more than 0.1 (10% more
           cores than necessary) to account for the usual rate of missing
           chips.
        """
        # Add this simulator to the set of open simulators
        Simulator._add_simulator(self)

        # Create the IO controller
        io_cls = getconfig(network.config, Simulator, "node_io", Ethernet)
        io_kwargs = getconfig(network.config, Simulator, "node_io_kwargs",
                              dict())
        self.io_controller = io_cls(**io_kwargs)

        # Calculate the machine timestep, this is measured in microseconds
        # (hence the 1e6 scaling factor).
        self.timescale = timescale
        machine_timestep = int((dt / timescale) * 1e6)

        # Determine the maximum run-time
        self.max_steps = None if period is None else int(period / dt)

        self.steps = 0  # Steps simulated

        # If the simulator is in "run indefinite" mode (i.e., max_steps=None)
        # then we modify the builders to ignore function of time Nodes and
        # probes.
        builder_kwargs = self.io_controller.builder_kwargs
        if self.max_steps is None:
            raise NotImplementedError

        # Create a model from the network, using the IO controller
        logger.debug("Building model")
        start_build = time.time()
        self.model = Model(dt=dt, machine_timestep=machine_timestep,
                           decoder_cache=get_default_decoder_cache())
        self.model.build(network, **builder_kwargs)

        logger.info("Build took {:.3f} seconds".format(time.time() -
                                                       start_build))

        self.model.decoder_cache.shrink()
        self.dt = self.model.dt
        self._closed = False  # Whether the simulator has been closed or not

        self.host_sim = self._create_host_sim()

        # Holder for probe data
        self.data = {}

        # Holder for profiling data
        self.profiler_data = {}

        # Convert the model into a netlist
        logger.info("Building netlist")
        start = time.time()
        self.netlist = self.model.make_netlist(self.max_steps or 0)

        # Determine whether to use a spalloc machine or not
        if use_spalloc is None:
            # Default is to not use spalloc; this is indicated by either the
            # absence of the option in the config file OR the option being set
            # to false.
            use_spalloc = (
                rc.has_option("spinnaker_machine", "use_spalloc") and
                rc.getboolean("spinnaker_machine", "use_spalloc"))

        # Create a controller for the machine and boot if necessary
        self.job = None
        if not use_spalloc or hostname is not None:
            # Use the specified machine rather than trying to get one
            # allocated.
            if hostname is None:
                hostname = rc.get("spinnaker_machine", "hostname")
        else:
            # Attempt to get a machine allocated to us
            from spalloc import Job

            # Determine how many boards to ask for (assuming 16 usable cores
            # per chip and 48 chips per board).
            n_cores = self.netlist.n_cores * (1.0 + allocation_fudge_factor)
            n_boards = int(np.ceil((n_cores / 16.) / 48.))

            # Request the job
            self.job = Job(n_boards)
            logger.info("Allocated job ID %d...", self.job.id)

            # Wait until we're given the machine
            logger.info("Waiting for machine allocation...")
            self.job.wait_until_ready()

            # spalloc recommends a slight delay before attempting to boot the
            # machine, later versions of spalloc server may relax this
            # requirement.
            time.sleep(5.0)

            # Store the hostname
            hostname = self.job.hostname
            logger.info("Using %d board(s) of \"%s\" (%s)",
                        len(self.job.boards), self.job.machine_name, hostname)

        self.controller = MachineController(hostname)
        self.controller.boot()

        # Get a system-info object to place & route against
        logger.info("Getting SpiNNaker machine specification")
        system_info = self.controller.get_system_info()

        # Place & Route
        logger.info("Placing and routing")
        self.netlist.place_and_route(
            system_info,
            place=getconfig(network.config, Simulator,
                            'placer', rig.place_and_route.place),
            place_kwargs=getconfig(network.config, Simulator,
                                   'placer_kwargs', {}),
        )

        logger.info("{} cores in use".format(len(self.netlist.placements)))
        chips = set(six.itervalues(self.netlist.placements))
        logger.info("Using {}".format(chips))

        # Prepare the simulator against the placed, allocated and routed
        # netlist.
        self.io_controller.prepare(self.model, self.controller, self.netlist)

        # Load the application
        logger.info("Loading application")
        self.netlist.load_application(self.controller, system_info)

        # Check if any cores are in bad states
        if self.controller.count_cores_in_state(["exit", "dead", "watchdog",
                                                 "runtime_exception"]):
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not AppState.sync0:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status))
                    print(self.controller.get_iobuf(p, x, y))
            raise Exception("Unexpected core failures.")

        logger.info("Preparing and loading machine took {:3f} seconds".format(
            time.time() - start
        ))

        logger.info("Setting router timeout to 16 cycles")
        for x, y in system_info.chips():
            with self.controller(x=x, y=y):
                data = self.controller.read(0xf1000000, 4)
                self.controller.write(0xf1000000, data[:-1] + b'\x10')
예제 #8
0
class Simulator(object):
    """SpiNNaker simulator for Nengo models.

    The simulator period determines how much data will be stored on SpiNNaker
    and is the maximum length of simulation allowed before data is transferred
    between the machine and the host PC. If the period is set to `None`
    function of time Nodes will not be optimised and probes will be disabled.
    For any other value simulation lengths of less than or equal to the period
    will be in real-time, longer simulations will be possible but will include
    short gaps when data is transferred between SpiNNaker and the host.

    :py:meth:`~.close` should be called when the simulator will no longer be
    used. This will close all sockets used to communicate with the SpiNNaker
    machine and will leave the machine in a clean state. Failure to call
    `close` may result in later failures. Alternatively `with` may be used::

        sim = nengo_spinnaker.Simulator(network)
        with sim:
            sim.run(10.0)
    """
    _open_simulators = set()

    @classmethod
    def _add_simulator(cls, simulator):
        cls._open_simulators.add(simulator)

    @classmethod
    def _remove_simulator(cls, simulator):
        cls._open_simulators.remove(simulator)

    def __init__(self, network, dt=0.001, period=10.0, timescale=1.0,
                 hostname=None, use_spalloc=None,
                 allocation_fudge_factor=0.6):
        """Create a new Simulator with the given network.

        Parameters
        ----------
        period : float or None
            Duration of one period of the simulator. This determines how much
            memory will be allocated to store precomputed and probed data.
        timescale : float
            Scaling factor to apply to the simulation, e.g., a value of `0.5`
            will cause the simulation to run at half real-time.
        hostname : string or None
            Hostname of the SpiNNaker machine to use; if None then the machine
            specified in the config file will be used.
        use_spalloc : bool or None
            Allocate a SpiNNaker machine for the simulator using ``spalloc``.
            If None then the setting specified in the config file will be used.

        Other Parameters
        ----------------
        allocation_fudge_factor:
           Fudge factor to allocate more cores than really necessary when using
           `spalloc` to ensure that (a) there are sufficient "live" cores in
           the allocated machine, (b) there is sufficient room for a good place
           and route solution. This should generally be more than 0.1 (10% more
           cores than necessary) to account for the usual rate of missing
           chips.
        """
        # Add this simulator to the set of open simulators
        Simulator._add_simulator(self)

        # Create the IO controller
        io_cls = getconfig(network.config, Simulator, "node_io", Ethernet)
        io_kwargs = getconfig(network.config, Simulator, "node_io_kwargs",
                              dict())
        self.io_controller = io_cls(**io_kwargs)

        # Calculate the machine timestep, this is measured in microseconds
        # (hence the 1e6 scaling factor).
        self.timescale = timescale
        machine_timestep = int((dt / timescale) * 1e6)

        # Determine the maximum run-time
        self.max_steps = None if period is None else int(period / dt)

        self.steps = 0  # Steps simulated

        # If the simulator is in "run indefinite" mode (i.e., max_steps=None)
        # then we modify the builders to ignore function of time Nodes and
        # probes.
        builder_kwargs = self.io_controller.builder_kwargs
        if self.max_steps is None:
            raise NotImplementedError

        # Create a model from the network, using the IO controller
        logger.debug("Building model")
        start_build = time.time()
        self.model = Model(dt=dt, machine_timestep=machine_timestep,
                           decoder_cache=get_default_decoder_cache())
        self.model.build(network, **builder_kwargs)

        logger.info("Build took {:.3f} seconds".format(time.time() -
                                                       start_build))

        self.model.decoder_cache.shrink()
        self.dt = self.model.dt
        self._closed = False  # Whether the simulator has been closed or not

        self.host_sim = self._create_host_sim()

        # Holder for probe data
        self.data = {}

        # Holder for profiling data
        self.profiler_data = {}

        # Convert the model into a netlist
        logger.info("Building netlist")
        start = time.time()
        self.netlist = self.model.make_netlist(self.max_steps or 0)

        # Determine whether to use a spalloc machine or not
        if use_spalloc is None:
            # Default is to not use spalloc; this is indicated by either the
            # absence of the option in the config file OR the option being set
            # to false.
            use_spalloc = (
                rc.has_option("spinnaker_machine", "use_spalloc") and
                rc.getboolean("spinnaker_machine", "use_spalloc"))

        # Create a controller for the machine and boot if necessary
        self.job = None
        if not use_spalloc or hostname is not None:
            # Use the specified machine rather than trying to get one
            # allocated.
            if hostname is None:
                hostname = rc.get("spinnaker_machine", "hostname")
        else:
            # Attempt to get a machine allocated to us
            from spalloc import Job

            # Determine how many boards to ask for (assuming 16 usable cores
            # per chip and 48 chips per board).
            n_cores = self.netlist.n_cores * (1.0 + allocation_fudge_factor)
            n_boards = int(np.ceil((n_cores / 16.) / 48.))

            # Request the job
            self.job = Job(n_boards)
            logger.info("Allocated job ID %d...", self.job.id)

            # Wait until we're given the machine
            logger.info("Waiting for machine allocation...")
            self.job.wait_until_ready()

            # spalloc recommends a slight delay before attempting to boot the
            # machine, later versions of spalloc server may relax this
            # requirement.
            time.sleep(5.0)

            # Store the hostname
            hostname = self.job.hostname
            logger.info("Using %d board(s) of \"%s\" (%s)",
                        len(self.job.boards), self.job.machine_name, hostname)

        self.controller = MachineController(hostname)
        self.controller.boot()

        # Get a system-info object to place & route against
        logger.info("Getting SpiNNaker machine specification")
        system_info = self.controller.get_system_info()

        # Place & Route
        logger.info("Placing and routing")
        self.netlist.place_and_route(
            system_info,
            place=getconfig(network.config, Simulator,
                            'placer', rig.place_and_route.place),
            place_kwargs=getconfig(network.config, Simulator,
                                   'placer_kwargs', {}),
        )

        logger.info("{} cores in use".format(len(self.netlist.placements)))
        chips = set(six.itervalues(self.netlist.placements))
        logger.info("Using {}".format(chips))

        # Prepare the simulator against the placed, allocated and routed
        # netlist.
        self.io_controller.prepare(self.model, self.controller, self.netlist)

        # Load the application
        logger.info("Loading application")
        self.netlist.load_application(self.controller, system_info)

        # Check if any cores are in bad states
        if self.controller.count_cores_in_state(["exit", "dead", "watchdog",
                                                 "runtime_exception"]):
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not AppState.sync0:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status))
                    print(self.controller.get_iobuf(p, x, y))
            raise Exception("Unexpected core failures.")

        logger.info("Preparing and loading machine took {:3f} seconds".format(
            time.time() - start
        ))

        logger.info("Setting router timeout to 16 cycles")
        for x, y in system_info.chips():
            with self.controller(x=x, y=y):
                data = self.controller.read(0xf1000000, 4)
                self.controller.write(0xf1000000, data[:-1] + b'\x10')

    def __enter__(self):
        """Enter a context which will close the simulator when exited."""
        # Return self to allow usage like:
        #
        #     with nengo_spinnaker.Simulator(model) as sim:
        #         sim.run(1.0)
        return self

    def __exit__(self, exception_type, exception_value, traceback):
        """Exit a context and close the simulator."""
        self.close()

    def run(self, time_in_seconds):
        """Simulate for the given length of time."""
        # Determine how many steps to simulate for
        steps = int(np.round(float(time_in_seconds) / self.dt))
        self.run_steps(steps)

    def run_steps(self, steps):
        """Simulate a give number of steps."""
        while steps > 0:
            n_steps = min((steps, self.max_steps))
            self._run_steps(n_steps)
            steps -= n_steps

    def _run_steps(self, steps):
        """Simulate for the given number of steps."""
        if self._closed:
            raise Exception("Simulator has been closed and can't be used to "
                            "run further simulations.")

        if steps is None:
            if self.max_steps is not None:
                raise Exception(
                    "Cannot run indefinitely if a simulator period was "
                    "specified. Create a new simulator with Simulator(model, "
                    "period=None) to perform indefinite time simulations."
                )
        else:
            assert steps <= self.max_steps

        # Prepare the simulation
        self.netlist.before_simulation(self, steps)

        # Wait for all cores to hit SYNC0 (either by remaining it or entering
        # it from init)
        self._wait_for_transition(AppState.init, AppState.sync0,
                                  self.netlist.n_cores)
        self.controller.send_signal("sync0")

        # Get a new thread for the IO
        io_thread = self.io_controller.spawn()

        # Run the simulation
        try:
            # Prep
            exp_time = steps * self.dt / self.timescale
            io_thread.start()

            # Wait for all cores to hit SYNC1
            self._wait_for_transition(AppState.sync0, AppState.sync1,
                                      self.netlist.n_cores)
            logger.info("Running simulation...")
            self.controller.send_signal("sync1")

            # Execute the local model
            host_steps = 0
            start_time = time.time()
            run_time = 0.0
            local_timestep = self.dt / self.timescale
            while run_time < exp_time:
                # Run a step
                self.host_sim.step()
                run_time = time.time() - start_time

                # If that step took less than timestep then spin
                time.sleep(0.0001)
                while run_time < host_steps * local_timestep:
                    time.sleep(0.0001)
                    run_time = time.time() - start_time
        finally:
            # Stop the IO thread whatever occurs
            io_thread.stop()

        # Wait for cores to re-enter sync0
        self._wait_for_transition(AppState.run, AppState.sync0,
                                  self.netlist.n_cores)

        # Retrieve simulation data
        start = time.time()
        logger.info("Retrieving simulation data")
        self.netlist.after_simulation(self, steps)
        logger.info("Retrieving data took {:3f} seconds".format(
            time.time() - start
        ))

        # Increase the steps count
        self.steps += steps

    def _wait_for_transition(self, from_state, desired_to_state, num_verts):
        while True:
            # If no cores are still in from_state, stop
            if self.controller.count_cores_in_state(from_state) == 0:
                break

            # Wait a bit
            time.sleep(1.0)

        # Check if any cores haven't exited cleanly
        num_ready = self.controller.wait_for_cores_to_reach_state(
            desired_to_state, num_verts, timeout=5.0)

        if num_ready != num_verts:
            # Loop through all placed vertices
            for vertex, (x, y) in six.iteritems(self.netlist.placements):
                p = self.netlist.allocations[vertex][Cores].start
                status = self.controller.get_processor_status(p, x, y)
                if status.cpu_state is not desired_to_state:
                    print("Core ({}, {}, {}) in state {!s}".format(
                        x, y, p, status.cpu_state))
                    print(self.controller.get_iobuf(p, x, y))

            raise Exception("Unexpected core failures before reaching %s "
                            "state." % desired_to_state)

    def _create_host_sim(self):
        # change node_functions to reflect time
        # TODO: improve the reference simulator so that this is not needed
        #       by adding a realtime option
        node_functions = {}
        node_info = dict(start=None)
        for node in self.io_controller.host_network.all_nodes:
            if callable(node.output):
                old_func = node.output
                if node.size_in == 0:
                    def func(t, f=old_func):
                        now = time.time()
                        if node_info['start'] is None:
                            node_info['start'] = now

                        t = (now - node_info['start']) * self.timescale
                        return f(t)
                else:
                    def func(t, x, f=old_func):
                        now = time.time()
                        if node_info['start'] is None:
                            node_info['start'] = now

                        t = (now - node_info['start']) * self.timescale
                        return f(t, x)
                node.output = func
                node_functions[node] = old_func

        # Build the host simulator
        host_sim = nengo.Simulator(self.io_controller.host_network,
                                   dt=self.dt)
        # reset node functions
        for node, func in node_functions.items():
            node.output = func

        return host_sim

    def close(self):
        """Clean the SpiNNaker board and prevent further simulation."""
        if not self._closed:
            # Stop the application
            self._closed = True
            self.io_controller.close()
            self.controller.send_signal("stop")

            # Destroy the job if we allocated one
            if self.job is not None:
                self.job.destroy()

            # Remove this simulator from the list of open simulators
            Simulator._remove_simulator(self)

    def trange(self, dt=None):
        return np.arange(1, self.steps + 1) * (self.dt or dt)
    def __call__(self,
                 spalloc_server,
                 spalloc_user,
                 n_chips,
                 spalloc_port=None):
        """

        :param spalloc_server: The server from which the machine should be\
                    requested
        :param spalloc_port: The port of the SPALLOC server
        :param spalloc_user: The user to allocate the machine to
        :param n_chips: The number of chips required
        """

        # Work out how many boards are needed
        n_boards = float(n_chips) / self._N_CHIPS_PER_BOARD

        # If the number of boards rounded up is less than 10% bigger than the\
        # actual number of boards, add another board just in case
        if math.ceil(n_boards) - n_boards < 0.1:
            n_boards += 1
        n_boards = int(math.ceil(n_boards))

        job = None
        if spalloc_port is None:
            job = Job(n_boards, hostname=spalloc_server, owner=spalloc_user)
        else:
            job = Job(n_boards,
                      hostname=spalloc_server,
                      port=spalloc_port,
                      owner=spalloc_user)

        try:
            job.wait_until_ready()
        except:
            job.destroy()
            ex_type, ex_value, ex_traceback = sys.exc_info()
            raise ex_type, ex_value, ex_traceback

        # get param from jobs before starting, so that hanging doesn't occur
        width = job.width
        height = job.height
        hostname = job.hostname

        machine_allocation_controller = _SpallocJobController(job)
        machine_allocation_controller.start()

        return {
            "machine_name": hostname,
            "machine_version": self._MACHINE_VERSION,
            "machine_width": width,
            "machine_height": height,
            "machine_n_boards": None,
            "machine_down_chips": None,
            "machine_down_cores": None,
            "machine_bmp_details": None,
            "reset_machine_on_start_up": False,
            "auto_detect_bmp": False,
            "scamp_connection_data": None,
            "boot_port_number": None,
            "max_sdram_size": None,
            "machine_allocation_controller": machine_allocation_controller
        }
예제 #10
0
def main(argv=None):
    t = Terminal(stream=sys.stderr)

    cfg = config.read_config()

    parser = argparse.ArgumentParser(
        description="Request (and allocate) a SpiNNaker machine.")

    parser.add_argument("--version",
                        "-V",
                        action="version",
                        version=__version__)

    parser.add_argument("--quiet",
                        "-q",
                        action="store_true",
                        default=False,
                        help="suppress informational messages")
    parser.add_argument("--debug",
                        action="store_true",
                        default=False,
                        help="enable additional diagnostic information")
    parser.add_argument("--no-destroy",
                        "-D",
                        action="store_true",
                        default=False,
                        help="do not destroy the job on exit")

    if MachineController is not None:
        parser.add_argument("--boot",
                            "-B",
                            action="store_true",
                            default=False,
                            help="boot the machine once powered on")

    allocation_args = parser.add_argument_group(
        "allocation requirement arguments")
    allocation_args.add_argument("what",
                                 nargs="*",
                                 default=[],
                                 type=int,
                                 metavar="WHAT",
                                 help="what to allocate: nothing or 1 "
                                 "requests 1 SpiNN-5 board, NUM requests "
                                 "at least NUM SpiNN-5 boards, WIDTH "
                                 "HEIGHT means WIDTHxHEIGHT triads of "
                                 "SpiNN-5 boards and X Y Z requests a "
                                 "board the specified logical board "
                                 "coordinate.")
    allocation_args.add_argument("--resume",
                                 "-r",
                                 type=int,
                                 help="if given, resume keeping the "
                                 "specified job alive rather than "
                                 "creating a new job (all allocation "
                                 "requirements will be ignored)")
    allocation_args.add_argument("--machine",
                                 "-m",
                                 nargs="?",
                                 default=cfg["machine"],
                                 help="only allocate boards which are part "
                                 "of a specific machine, or any machine "
                                 "if no machine is given "
                                 "(default: %(default)s)")
    allocation_args.add_argument("--tags",
                                 "-t",
                                 nargs="*",
                                 metavar="TAG",
                                 default=cfg["tags"] or ["default"],
                                 help="only allocate boards which have (at "
                                 "least) the specified flags "
                                 "(default: {})".format(" ".join(cfg["tags"]
                                                                 or [])))
    allocation_args.add_argument("--min-ratio",
                                 type=float,
                                 metavar="RATIO",
                                 default=cfg["min_ratio"],
                                 help="when allocating by number of boards, "
                                 "require that the allocation be at "
                                 "least as square as this ratio "
                                 "(default: %(default)s)")
    allocation_args.add_argument("--max-dead-boards",
                                 type=int,
                                 metavar="NUM",
                                 default=(-1 if cfg["max_dead_boards"] is None
                                          else cfg["max_dead_boards"]),
                                 help="boards allowed to be "
                                 "dead in the allocation, or -1 to allow "
                                 "any number of dead boards "
                                 "(default: %(default)s)")
    allocation_args.add_argument("--max-dead-links",
                                 type=int,
                                 metavar="NUM",
                                 default=(-1 if cfg["max_dead_links"] is None
                                          else cfg["max_dead_links"]),
                                 help="inter-board links allowed to be "
                                 "dead in the allocation, or -1 to allow "
                                 "any number of dead links "
                                 "(default: %(default)s)")
    allocation_args.add_argument(
        "--require-torus",
        "-w",
        action="store_true",
        default=cfg["require_torus"],
        help="require that the allocation contain "
        "torus (a.k.a. wrap-around) "
        "links {}".format("(default)" if cfg["require_torus"] else ""))
    allocation_args.add_argument(
        "--no-require-torus",
        "-W",
        action="store_false",
        dest="require_torus",
        help="do not require that the allocation "
        "contain torus (a.k.a. wrap-around) "
        "links {}".format("" if cfg["require_torus"] else "(default)"))

    command_args = parser.add_argument_group("command wrapping arguments")
    command_args.add_argument("--command",
                              "-c",
                              nargs=argparse.REMAINDER,
                              help="execute the specified command once boards "
                              "have been allocated and deallocate the "
                              "boards when the application exits ({} and "
                              "{hostname} are substituted for the chip "
                              "chip at (0, 0)'s hostname, {w} and "
                              "{h} give the dimensions of the SpiNNaker "
                              "machine in chips, {ethernet_ips} is a "
                              "temporary file containing a CSV with "
                              "three columns: x, y and hostname giving "
                              "the hostname of each Ethernet connected "
                              "SpiNNaker chip)")

    server_args = parser.add_argument_group("spalloc server arguments")

    server_args.add_argument("--owner",
                             default=cfg["owner"],
                             help="by convention, the email address of the "
                             "owner of the job (default: %(default)s)")
    server_args.add_argument("--hostname",
                             "-H",
                             default=cfg["hostname"],
                             help="hostname or IP of the spalloc server "
                             "(default: %(default)s)")
    server_args.add_argument("--port",
                             "-P",
                             default=cfg["port"],
                             type=int,
                             help="port number of the spalloc server "
                             "(default: %(default)s)")
    server_args.add_argument(
        "--keepalive",
        type=int,
        metavar="SECONDS",
        default=(-1 if cfg["keepalive"] is None else cfg["keepalive"]),
        help="the interval at which to require "
        "keepalive messages to be sent to "
        "prevent the server cancelling the "
        "job, or -1 to not require keepalive "
        "messages (default: %(default)s)")
    server_args.add_argument("--reconnect-delay",
                             default=cfg["reconnect_delay"],
                             type=float,
                             metavar="SECONDS",
                             help="seconds to wait before "
                             "reconnecting to the server if the "
                             "connection is lost (default: %(default)s)")
    server_args.add_argument("--timeout",
                             default=cfg["timeout"],
                             type=float,
                             metavar="SECONDS",
                             help="seconds to wait for a response "
                             "from the server (default: %(default)s)")

    args = parser.parse_args(argv)

    # Fail if no owner is defined (unless resuming)
    if not args.owner and args.resume is None:
        parser.error(
            "--owner must be specified (typically your email address)")

    # Fail if server not specified
    if args.hostname is None:
        parser.error("--hostname of spalloc server must be specified")

    # Set universal job arguments
    job_kwargs = {
        "hostname":
        args.hostname,
        "port":
        args.port,
        "reconnect_delay":
        args.reconnect_delay if args.reconnect_delay >= 0.0 else None,
        "timeout":
        args.timeout if args.timeout >= 0.0 else None,
    }

    if args.resume:
        job_args = []
        job_kwargs.update({
            "resume_job_id": args.resume,
        })
    else:
        # Make sure 'what' takes the right form
        if len(args.what) not in (0, 1, 2, 3):
            parser.error("expected either no arguments, one argument, NUM, "
                         "two arguments, WIDTH HEIGHT, or three arguments "
                         "X Y Z")

        # Unpack arguments for the job and server
        job_args = args.what
        job_kwargs.update({
            "owner":
            args.owner,
            "keepalive":
            args.keepalive if args.keepalive >= 0.0 else None,
            "machine":
            args.machine,
            "tags":
            args.tags if args.machine is None else None,
            "min_ratio":
            args.min_ratio,
            "max_dead_boards":
            args.max_dead_boards if args.max_dead_boards >= 0.0 else None,
            "max_dead_links":
            args.max_dead_links if args.max_dead_links >= 0.0 else None,
            "require_torus":
            args.require_torus,
        })

    # Set debug level
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)

    # Create temporary file in which to write CSV of all board IPs
    _, ip_file_filename = tempfile.mkstemp(".csv", "spinnaker_ips_")

    def info(msg):
        if not args.quiet:
            t.stream.write("{}\n".format(msg))

    # Reason for destroying the job
    reason = None

    try:
        # Create the job
        try:
            job = Job(*job_args, **job_kwargs)
        except (OSError, IOError) as e:
            info(t.red("Could not connect to server: {}".format(e)))
            return 6
        try:
            # Wait for it to become ready, keeping the user informed along the
            # way
            old_state = None
            cur_state = job.state
            while True:
                # Show debug info on state-change
                if old_state != cur_state:
                    if cur_state == JobState.queued:
                        info(
                            t.update(
                                t.yellow("Job {}: Waiting in queue...".format(
                                    job.id))))
                    elif cur_state == JobState.power:
                        info(
                            t.update(
                                t.yellow(
                                    "Job {}: Waiting for power on...".format(
                                        job.id))))
                    elif cur_state == JobState.ready:
                        # Here we go!
                        break
                    elif cur_state == JobState.destroyed:
                        # Exit with error state
                        try:
                            reason = job.reason
                        except (IOError, OSError):
                            reason = None

                        if reason is not None:
                            info(
                                t.update(
                                    t.red("Job {}: Destroyed: {}".format(
                                        job.id, reason))))
                        else:
                            info(t.red("Job {}: Destroyed.".format(job.id)))
                        return 1
                    elif cur_state == JobState.unknown:
                        info(
                            t.update(
                                t.red("Job {}: Job not recognised by server.".
                                      format(job.id))))
                        return 2
                    else:
                        info(
                            t.update(
                                t.red(
                                    "Job {}: Entered an unrecognised state {}."
                                    .format(job.id, cur_state))))
                        return 3

                try:
                    old_state = cur_state
                    cur_state = job.wait_for_state_change(cur_state)
                except KeyboardInterrupt:
                    # Gracefully terminate from keyboard interrupt
                    info(
                        t.update(
                            t.red("Job {}: Keyboard interrupt.".format(
                                job.id))))
                    reason = "Keyboard interrupt."
                    return 4

            # Machine is now ready
            write_ips_to_csv(job.connections, ip_file_filename)

            # Boot the machine if required
            if MachineController is not None and args.boot:
                info(t.update(t.yellow("Job {}: Booting...".format(job.id))))
                mc = MachineController(job.hostname)
                mc.boot(job.width, job.height)

            info(t.update(t.green("Job {}: Ready!".format(job.id))))

            # Either run the user's application or just print the details.
            if args.command:
                return run_command(args.command, job.id, job.machine_name,
                                   job.connections, job.width, job.height,
                                   ip_file_filename)

            else:
                print_info(job.machine_name, job.connections, job.width,
                           job.height, ip_file_filename)
                return 0
        finally:
            # Destroy job and disconnect client
            if args.no_destroy:
                job.close()
            else:
                job.destroy(reason)
    finally:
        # Delete IP address list file
        os.remove(ip_file_filename)