def start_all_cores(self, executable_targets, app_id, txrx,
                        sync_state_changes):
        """
        :param executable_targets: the mapping between cores and binaries
        :param app_id: the app id that being used by the simulation
        :param sync_state_changes: the number of runs been done between setup\
                and end
        :param txrx: the python interface to the spinnaker machine
        :return: None
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        # check that the right number of processors are in correct sync
        if sync_state_changes % 2 == 0:
            sync_state = SCPSignal.SYNC0
        else:
            sync_state = SCPSignal.SYNC1

        # if correct, start applications
        logger.info("Starting application")
        txrx.send_signal(app_id, sync_state)
        sync_state_changes += 1

        # check all apps have gone into run state
        logger.info("Checking that the application has started")
        processors_running = txrx.get_core_state_count(
            app_id, CPUState.RUNNING)
        if processors_running < total_processors:

            # deduce the correct state value
            if sync_state_changes % 2 == 0:
                sync_state = CPUState.SYNC0
            else:
                sync_state = CPUState.SYNC1

            processors_finished = txrx.get_core_state_count(
                app_id, sync_state)
            if processors_running + processors_finished >= total_processors:
                logger.warn("some processors finished between signal "
                            "transmissions. Could be a sign of an error")
            else:
                unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                    all_core_subsets, CPUState.RUNNING, txrx)
                break_down = helpful_functions.get_core_status_string(
                    unsuccessful_cores)
                raise exceptions.ExecutableFailedToStartException(
                    "Only {} of {} processors started:{}"
                    .format(processors_running, total_processors, break_down))
    def wait_for_cores_to_be_ready(
            self, executable_targets, app_id, txrx, no_sync_state_changes):
        """

        :param executable_targets: the mapping between cores and binaries
        :param app_id: the app id that being used by the simulation
        :param no_sync_state_changes:  the number of runs been done between\
                setup and end
        :param txrx: the python interface to the spinnaker machine
        :return:
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        processor_c_main = txrx.get_core_state_count(
            app_id, CPUState.C_MAIN)

        # check that everything has gone though c main to reach sync0 or
        # failing for some unknown reason
        while processor_c_main != 0:
            time.sleep(0.1)
            processor_c_main = txrx.get_core_state_count(
                app_id, CPUState.C_MAIN)

        # check that the right number of processors are in correct sync
        if no_sync_state_changes % 2 == 0:
            sync_state = CPUState.SYNC0
        else:
            sync_state = CPUState.SYNC1

        # check that the right number of processors are in sync0
        processors_ready = txrx.get_core_state_count(
            app_id, sync_state)

        if processors_ready != total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, sync_state, txrx)

            # last chance to slip out of error check
            if len(unsuccessful_cores) != 0:
                break_down = helpful_functions.get_core_status_string(
                    unsuccessful_cores)
                raise exceptions.ExecutableFailedToStartException(
                    "Only {} processors out of {} have successfully reached "
                    "{}:{}".format(
                        processors_ready, total_processors, sync_state.name,
                        break_down))
Example #3
0
    def start_all_cores(executable_targets, app_id, txrx, sync_state_changes):
        """
        :param executable_targets: the mapping between cores and binaries
        :param app_id: the app id that being used by the simulation
        :param sync_state_changes: the number of runs been done between setup\
                and end
        :param txrx: the python interface to the spinnaker machine
        :return: None
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        # check that the right number of processors are in correct sync
        if sync_state_changes % 2 == 0:
            sync_state = SCPSignal.SYNC0
        else:
            sync_state = SCPSignal.SYNC1

        # if correct, start applications
        logger.info("Starting application ({})".format(sync_state))
        txrx.send_signal(app_id, sync_state)
        sync_state_changes += 1

        # check all apps have gone into run state
        logger.info("Checking that the application has started")
        processors_running = txrx.get_core_state_count(app_id,
                                                       CPUState.RUNNING)
        if processors_running < total_processors:

            processors_finished = txrx.get_core_state_count(
                app_id, CPUState.PAUSED)
            if processors_running + processors_finished >= total_processors:
                logger.warn("some processors finished between signal "
                            "transmissions. Could be a sign of an error")
            else:
                unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                    all_core_subsets, {CPUState.RUNNING, CPUState.PAUSED},
                    txrx)

                # Last chance to get out of error state
                if len(unsuccessful_cores) > 0:
                    break_down = helpful_functions.get_core_status_string(
                        unsuccessful_cores)
                    raise exceptions.ExecutableFailedToStartException(
                        "Only {} of {} processors started:{}".format(
                            processors_running, total_processors, break_down),
                        helpful_functions.get_core_subsets(unsuccessful_cores))
Example #4
0
    def wait_for_cores_to_be_ready(executable_targets, app_id, txrx,
                                   no_sync_state_changes):
        """

        :param executable_targets: the mapping between cores and binaries
        :param app_id: the app id that being used by the simulation
        :param no_sync_state_changes:  the number of runs been done between\
                setup and end
        :param txrx: the python interface to the spinnaker machine
        :return:
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        # check that everything has gone though c main to reach sync0 or
        # failing for some unknown reason
        processor_c_main = txrx.get_core_state_count(app_id, CPUState.C_MAIN)
        while processor_c_main != 0:
            time.sleep(0.1)
            processor_c_main = txrx.get_core_state_count(
                app_id, CPUState.C_MAIN)

        # check that the right number of processors are in correct sync
        if no_sync_state_changes % 2 == 0:
            sync_state = CPUState.SYNC0
        else:
            sync_state = CPUState.SYNC1

        # check that the right number of processors are in sync0
        processors_ready = txrx.get_core_state_count(app_id, sync_state)

        if processors_ready != total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, sync_state, txrx)

            # last chance to slip out of error check
            if len(unsuccessful_cores) != 0:
                break_down = helpful_functions.get_core_status_string(
                    unsuccessful_cores)
                raise exceptions.ExecutableFailedToStartException(
                    "Only {} processors out of {} have successfully reached "
                    "{}:{}".format(processors_ready, total_processors,
                                   sync_state.name, break_down),
                    helpful_functions.get_core_subsets(unsuccessful_cores))
Example #5
0
    def __call__(self, txrx, app_id, all_core_subsets):

        # check that the right number of processors are in sync
        processors_completed = txrx.get_core_state_count(
            app_id, CPUState.FINISHED)
        total_processors = len(all_core_subsets)
        left_to_do_cores = total_processors - processors_completed

        progress_bar = ProgressBar(
            left_to_do_cores,
            "Forcing error cores to generate provenance data")

        # check that all cores are in the state CPU_STATE_12 which shows that
        # the core has received the message and done provenance updating
        while processors_completed != total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, CPUState.FINISHED, txrx)

            for (x, y, p) in unsuccessful_cores:
                data = struct.pack(
                    "<I", constants.SDP_RUNNING_MESSAGE_CODES.
                    SDP_UPDATE_PROVENCE_REGION_AND_EXIT.value)
                txrx.send_sdp_message(
                    SDPMessage(SDPHeader(
                        flags=SDPFlag.REPLY_NOT_EXPECTED,
                        destination_cpu=p,
                        destination_chip_x=x,
                        destination_port=(constants.SDP_PORTS.
                                          RUNNING_COMMAND_SDP_PORT.value),
                        destination_chip_y=y),
                               data=data))

            processors_completed = txrx.get_core_state_count(
                app_id, CPUState.FINISHED)

            left_over_now = total_processors - processors_completed
            to_update = left_to_do_cores - left_over_now
            if to_update != 0:
                progress_bar.update(to_update)
        progress_bar.end()
    def __call__(
            self, placements, txrx, no_sync_changes, app_id,
            executable_targets, graph_mapper):

        # check that the right number of processors are in sync0
        processors_ready = \
            txrx.get_core_state_count(app_id, CPUState.CPU_STATE_12)
        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        # check that all cores are in the state CPU_STATE_12 which shows that
        # the core has received the new runtime
        while processors_ready != total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, CPUState.CPU_STATE_12, txrx)

            for (x, y, p) in unsuccessful_cores:
                subvertex = placements.get_subvertex_on_processor(x, y, p)
                vertex = graph_mapper.get_vertex_from_subvertex(subvertex)
                infinite_run = 0
                steps = vertex.no_machine_time_steps
                if steps is None:
                    infinite_run = 1
                    steps = 0

                data = struct.pack(
                    "<III",
                    constants.SDP_RUNNING_MESSAGE_CODES.SDP_NEW_RUNTIME_ID_CODE
                    .value, steps, infinite_run)
                txrx.send_sdp_message(SDPMessage(SDPHeader(
                    flags=SDPFlag.REPLY_NOT_EXPECTED,
                    destination_cpu=p,
                    destination_chip_x=x,
                    destination_port=(
                        constants.SDP_PORTS.RUNNING_COMMAND_SDP_PORT.value),
                    destination_chip_y=y), data=data))

            processors_ready = txrx.get_core_state_count(
                app_id, CPUState.CPU_STATE_12)

        # reset the state to the old state so that it can be used by the
        # application runner code
        if no_sync_changes % 2 == 0:
            sync_state = CPUState.SYNC0
        else:
            sync_state = CPUState.SYNC1
        processors_ready = txrx.get_core_state_count(app_id, sync_state)

        # check that all cores are in the state CPU_STATE_12 which shows that
        # the core has received the new runtime
        while processors_ready != total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, sync_state, txrx)

            for (x, y, p) in unsuccessful_cores:
                data = struct.pack(
                    "<II",
                    constants.SDP_RUNNING_MESSAGE_CODES.SDP_SWITCH_STATE.value,
                    sync_state.value)
                txrx.send_sdp_message(SDPMessage(SDPHeader(
                    flags=SDPFlag.REPLY_NOT_EXPECTED,
                    destination_cpu=p,
                    destination_chip_x=x,
                    destination_port=(
                        constants.SDP_PORTS.RUNNING_COMMAND_SDP_PORT.value),
                    destination_chip_y=y), data=data))

            processors_ready = txrx.get_core_state_count(app_id, sync_state)

        return {'no_sync_changes': no_sync_changes}
    def wait_for_execution_to_complete(
            self, executable_targets, app_id, runtime, time_scaling,
            txrx, buffer_manager, no_sync_state_changes):
        """

        :param executable_targets:
        :param app_id:
        :param runtime:
        :param time_scaling:
        :param buffer_manager:
        :param no_sync_state_changes: the number of runs been done between\
                setup and end
        :return:
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        time_to_wait = ((runtime * time_scaling) / 1000.0) + 1.0
        logger.info("Application started - waiting {} seconds for it to"
                    " stop".format(time_to_wait))
        time.sleep(time_to_wait)
        processors_not_finished = total_processors
        while processors_not_finished != 0:
            processors_rte = txrx.get_core_state_count(
                app_id, CPUState.RUN_TIME_EXCEPTION)
            if processors_rte > 0:
                rte_cores = helpful_functions.get_cores_in_state(
                    all_core_subsets, CPUState.RUN_TIME_EXCEPTION, txrx)
                break_down = \
                    helpful_functions.get_core_status_string(rte_cores)
                raise exceptions.ExecutableFailedToStopException(
                    "{} cores have gone into a run time error state:"
                    "{}".format(processors_rte, break_down))

            processors_not_finished = txrx.get_core_state_count(
                app_id, CPUState.RUNNING)
            if processors_not_finished > 0:
                logger.info("Simulation still not finished or failed - "
                            "waiting a bit longer...")
                time.sleep(0.5)

        if no_sync_state_changes % 2 == 1:
            sync_state = CPUState.SYNC0
        else:
            sync_state = CPUState.SYNC1

        processors_exited = txrx.get_core_state_count(
            app_id, sync_state)

        if processors_exited < total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, sync_state, txrx)
            break_down = helpful_functions.get_core_status_string(
                unsuccessful_cores)
            raise exceptions.ExecutableFailedToStopException(
                "{} of {} processors failed to exit successfully:"
                "{}".format(
                    total_processors - processors_exited, total_processors,
                    break_down))
        logger.info("Application has run to completion")
Example #8
0
    def wait_for_execution_to_complete(self, executable_targets, app_id,
                                       runtime, time_scaling, txrx,
                                       time_threshold):
        """

        :param executable_targets:
        :param app_id:
        :param runtime:
        :param time_scaling:
        :param time_threshold:
        :param txrx:
        :param no_sync_state_changes: the number of runs been done between\
                setup and end
        :return:
        """

        total_processors = executable_targets.total_processors
        all_core_subsets = executable_targets.all_core_subsets

        time_to_wait = ((runtime * time_scaling) / 1000.0) + 0.1
        logger.info(
            "Application started - waiting {} seconds for it to stop".format(
                time_to_wait))
        time.sleep(time_to_wait)
        processors_not_finished = total_processors
        start_time = time.time()

        retries = 0
        while (processors_not_finished != 0
               and not self._has_overrun(start_time, time_threshold)):
            try:
                processors_rte = txrx.get_core_state_count(
                    app_id, CPUState.RUN_TIME_EXCEPTION)
                processors_wdog = txrx.get_core_state_count(
                    app_id, CPUState.WATCHDOG)
                if processors_rte > 0 or processors_wdog > 0:
                    error_cores = helpful_functions.get_cores_in_state(
                        all_core_subsets,
                        {CPUState.RUN_TIME_EXCEPTION, CPUState.WATCHDOG}, txrx)
                    break_down = helpful_functions.get_core_status_string(
                        error_cores)
                    raise exceptions.ExecutableFailedToStopException(
                        "{} cores have gone into an error state:"
                        "{}".format(processors_rte, break_down),
                        helpful_functions.get_core_subsets(error_cores), True)

                processors_not_finished = txrx.get_core_state_count(
                    app_id, CPUState.RUNNING)
                if processors_not_finished > 0:
                    logger.info("Simulation still not finished or failed - "
                                "waiting a bit longer...")
                    time.sleep(0.5)
            except Exception as e:
                retries += 1
                if retries >= 10:
                    logger.error("Error getting state")
                    raise e
                logger.info("Error getting state - retrying...")
                time.sleep(0.5)

        if processors_not_finished != 0:
            running_cores = helpful_functions.get_cores_in_state(
                all_core_subsets, CPUState.RUNNING, txrx)
            if len(running_cores) > 0:
                raise exceptions.ExecutableFailedToStopException(
                    "Simulation did not finish within the time allocated. "
                    "Please try increasing the machine time step and / "
                    "or time scale factor in your simulation.",
                    helpful_functions.get_core_subsets(running_cores), False)

        processors_exited = txrx.get_core_state_count(app_id, CPUState.PAUSED)

        if processors_exited < total_processors:
            unsuccessful_cores = helpful_functions.get_cores_not_in_state(
                all_core_subsets, CPUState.PAUSED, txrx)

            # Last chance to get out of the error state
            if len(unsuccessful_cores) > 0:
                break_down = helpful_functions.get_core_status_string(
                    unsuccessful_cores)
                raise exceptions.ExecutableFailedToStopException(
                    "{} of {} processors failed to exit successfully:"
                    "{}".format(total_processors - processors_exited,
                                total_processors, break_down),
                    helpful_functions.get_core_subsets(unsuccessful_cores),
                    True)
        logger.info("Application has run to completion")