def show_test_results (self): """ show stage test results corresponding to (C struct) test_results in mlp_types.h: typedef struct test_results { uint epochs_trained; uint examples_tested; uint ticks_tested; uint examples_correct; } test_results_t; pack: standard sizes, little-endian byte order, explicit padding """ if not self.rec_test_results: print ("\n--------------------------------------------------") print ("warning: test results not recorded") print ("--------------------------------------------------\n") return if not self._aborted: # prepare to retrieve recorded test results data TEST_RESULTS_FORMAT = "<4I" TEST_RESULTS_SIZE = struct.calcsize(TEST_RESULTS_FORMAT) # retrieve recorded test results from last output subgroup g = self.out_grps[-1] ltv = g.t_vertex[g.subgroups - 1] try: rec_test_results = ltv.read ( gfe.placements().get_placement_of_vertex (ltv), gfe.buffer_manager(), MLPConstSizeRecordings.TEST_RESULTS.value ) except Exception as err: print ("\n--------------------------------------------------") print (f"error: test results aborted - {err}") print ("--------------------------------------------------\n") return if len (rec_test_results) >= TEST_RESULTS_SIZE: (epochs_trained, examples_tested, ticks_tested, examples_correct) = \ struct.unpack_from(TEST_RESULTS_FORMAT, rec_test_results, 0) print ("\n--------------------------------------------------") print ("stage {} Test results: {}, {}, {}, {}".format( self._stage_id, epochs_trained, examples_tested, ticks_tested, examples_correct )) print ("--------------------------------------------------\n") if self._results_file is not None: with open(self._results_file, 'a') as f: f.write("{},{},{},{}\n".format( epochs_trained, examples_tested, ticks_tested, examples_correct ))
logger = logging.getLogger(__name__) front_end.setup( n_chips_required=None, model_binary_folder=os.path.dirname(__file__)) ''' calculate total number of 'free' cores for the given board (i.e. does not include those busy with SARK or reinjection)''' total_number_of_cores = \ front_end.get_number_of_available_cores_on_machine() #param1: data #param2: number of chips used #param3: what columns to use #param4: how many string columns exist? #param5: function id load_data_onto_vertices(raw_data, 1, [0], 1, 2) front_end.run(10000) placements = front_end.placements() buffer_manager = front_end.buffer_manager() #write_unique_ids_to_csv(getData,1,len(raw_data)) #display_linked_list_size() #display_results_function_one() #display_results_function_two() display_results_function_three() front_end.stop()
def write_Lens_output_file (self, output_file ): """ writes a Lens-style output file Lens online manual @ CMU: https://ni.cmu.edu/~plaut/Lens/Manual/ File format: for each example: <I total-updates> <I example-number> <I ticks-on-example> <I num-groups> for each tick on the example: <I tick-number> <I event-number> for each WRITE_OUTPUTS group: <I num-units> <B targets?> for each unit: <R output-value> <R target-value> collects recorded tick data corresponding to (C struct) tick_record in mlp_types.h: typedef struct tick_record { uint epoch; // current epoch uint example; // current example uint event; // current event uint tick; // current tick } tick_record_t; collects recorded output data corresponding to (C type) short_activ_t in mlp_types.h: typedef short short_activ_t; pack: standard sizes, little-endian byte order, explicit padding """ if not self._rec_data_rdy: print ("\n--------------------------------------------------") print ("warning: file write aborted - outputs not available") print ("--------------------------------------------------\n") return if not self._aborted: with open(output_file, 'w') as f: # retrieve recorded tick_data from first output subgroup g = self.out_grps[0] ftv = g.t_vertex[0] try: rec_tick_data = ftv.read ( gfe.placements().get_placement_of_vertex (ftv), gfe.buffer_manager(), MLPExtraRecordings.TICK_DATA.value ) except Exception as err: print ("\n--------------------------------------------------") print (f"error: write output file aborted - {err}") print ("--------------------------------------------------\n") return # retrieve recorded outputs from every output group rec_outputs = [None] * len (self.out_grps) for g in self.out_grps: rec_outputs[g.write_blk] = [] # append all subgroups together for s in range (g.subgroups): gtv = g.t_vertex[s] try: rec_outputs[g.write_blk].append (gtv.read ( gfe.placements().get_placement_of_vertex (gtv), gfe.buffer_manager(), MLPVarSizeRecordings.OUTPUTS.value) ) except Exception as err: print ("\n--------------------------------------------------") print (f"error: write output file aborted - {err}") print ("--------------------------------------------------\n") return # compute total ticks in first example #TODO: need to get actual value from simulation, not max value ticks_per_example = 0 for ev in self._ex_set.examples[0].events: # use event max_time if available or default to set max_time, #NOTE: check for absent or NaN if (ev.max_time is None) or (ev.max_time != ev.max_time): max_time = int (self._ex_set.max_time) else: max_time = int (ev.max_time) # compute number of ticks for max time, ticks_per_example += (max_time + 1) * self._ticks_per_interval # and limit to the global maximum if required if ticks_per_example > self.global_max_ticks: ticks_per_example = self.global_max_ticks # prepare to retrieve recorded data TICK_DATA_FORMAT = "<4I" TICK_DATA_SIZE = struct.calcsize(TICK_DATA_FORMAT) TOTAL_TICKS = len (rec_tick_data) // TICK_DATA_SIZE # print recorded data in correct order current_epoch = -1 for tk in range (TOTAL_TICKS): (epoch, example, event, tick) = struct.unpack_from( TICK_DATA_FORMAT, rec_tick_data, tk * TICK_DATA_SIZE ) # check if starting new epoch if (epoch != current_epoch): current_epoch = epoch current_example = -1 # check if starting new example if (example != current_example): # print example header f.write (f"{epoch} {example}\n") f.write (f"{ticks_per_example} {len (self.out_grps)}\n") # include initial outputs if recording all ticks if not self.rec_example_last_tick_only: # print first (implicit) tick data f.write ("0 -1\n") for g in self.output_chain: f.write (f"{g.units} 1\n") for _ in range (g.units): f.write ("{:8.6f} {}\n".format (0, 0)) # compute event index evt_inx = 0 for ex in range (example): evt_inx += len (self._ex_set.examples[ex].events) # and prepare for next current_example = example # compute index into target array tgt_inx = evt_inx + event # print current tick data f.write (f"{tick} {event}\n") for g in self.output_chain: outputs = [] # get tick outputs for each subgroup for sg, rec_outs in enumerate (rec_outputs[g.write_blk]): outputs += struct.unpack_from ( f"<{g.subunits[sg]}H", rec_outs, tk * struct.calcsize(f"<{g.subunits[sg]}H") ) # print outputs f.write (f"{g.units} 1\n") tinx = tgt_inx * g.units for u in range (g.units): # outputs are s16.15 fixed-point numbers out = (1.0 * outputs[u]) / (1.0 * (1 << 15)) t = g.targets[tinx + u] #NOTE: check for absent or NaN if (t is None) or (t != t): tgt = "-" else: tgt = int (t) f.write ("{:8.6f} {}\n".format (out, tgt)) # recorded data no longer available self._rec_data_rdy = False
def stage_run (self): """ run a stage on application graph """ self._aborted = False # cannot run unless weights file exists if self._weights_file is None: print ("run aborted: weights file not given") self._aborted = True return # may need to reload initial weights file if # application graph was modified after load if not self._weights_loaded: if not self.read_Lens_weights_file (self._weights_file): print ("run aborted: error reading weights file") self._aborted = True return # cannot run unless example set exists if self._ex_set is None: print ("run aborted: no example set") self._aborted = True return # cannot run unless examples have been loaded if not self._ex_set.examples_loaded: print ("run aborted: examples not loaded") self._aborted = True return # generate summary set, example and event data if not self._ex_set.examples_compiled: if self._ex_set.compile (self) == 0: print ("run aborted: error compiling example set") self._aborted = True return # generate machine graph - if needed if not self._graph_rdy: if not self.generate_machine_graph (): print ("run aborted: error generating machine graph") self._aborted = True return # initialise recorded data flag self._rec_data_rdy = False # initialise recording buffers for new stage run if self._stage_id != 0: gfe.buffer_manager().reset() # run stage gfe.run_until_complete (self._stage_id) if (self.rec_outputs): self._rec_data_rdy = True # show TEST RESULTS if available if self.rec_test_results and not self.training: self.show_test_results () # prepare for next stage self._stage_id += 1
def run_mcmc(model, data, n_samples, burn_in=2000, thinning=5, degrees_of_freedom=3.0, seed=None, n_chips=None, n_boards=None): """ Executes an MCMC model, returning the received samples :param model: The MCMCModel to be used :param data: The data to sample :param n_samples: The number of samples to generate :param burn_in:\ no of MCMC transitions to reach apparent equilibrium before\ generating inference samples :param thinning:\ sampling rate i.e. 5 = 1 sample for 5 generated steps :param degrees_of_freedom:\ The number of degrees of freedom to jump around with :param seed: The random seed to use :param n_chips: The number of chips to run the model on :param root_finder: Use the root finder by adding root finder vertices :param cholesky: Use the Cholesky algorithm by adding Cholesky vertices :return: The samples read :rtype: A numpy array with fields for each model state variable """ # Set up the simulation g.setup(n_boards_required=n_boards, n_chips_required=n_chips, model_binary_module=model_binaries) # Get the number of cores available for use n_cores = 0 machine = g.machine() # Create a coordinator for each board coordinators = dict() boards = dict() for chip in machine.ethernet_connected_chips: # Create a coordinator coordinator = MCMCCoordinatorVertex(model, data, n_samples, burn_in, thinning, degrees_of_freedom, seed) g.add_machine_vertex_instance(coordinator) # Put the coordinator on the Ethernet chip coordinator.add_constraint(ChipAndCoreConstraint(chip.x, chip.y)) coordinators[chip.x, chip.y] = coordinator boards[chip.x, chip.y] = chip.ip_address # Go through all the chips and add the workhorses n_chips_on_machine = machine.n_chips n_workers = 0 if (model.root_finder): n_root_finders = 0 if (model.cholesky): n_cholesky = 0 for chip in machine.chips: # Count the cores in the processor # (-1 if this chip also has a coordinator) n_cores = len([p for p in chip.processors if not p.is_monitor]) if (chip.x, chip.y) in coordinators: n_cores -= 3 # coordinator and extra_monitor_support (2) if (model.root_finder): if (model.cholesky): n_cores = n_cores // 3 else: n_cores = n_cores // 2 else: n_cores -= 1 # just extra_monitor_support if (model.root_finder): if (model.cholesky): n_cores = n_cores // 3 else: n_cores = n_cores // 2 # Find the coordinator for the board (or 0, 0 if it is missing) eth_x = chip.nearest_ethernet_x eth_y = chip.nearest_ethernet_y coordinator = coordinators.get((eth_x, eth_y)) if coordinator is None: print("Warning - couldn't find {}, {} for chip {}, {}".format( eth_x, eth_y, chip.x, chip.y)) coordinator = coordinators[0, 0] print("Using coordinator ", coordinator) # hard-code remove some cores (chip power monitor etc.) just # to see what happens # n_cores -= non_worker_cores_per_chip # print 'n_cores: ', n_cores # Add a vertex for each core for _ in range(n_cores): # Create the vertex and add it to the graph vertex = MCMCVertex(coordinator, model) n_workers += 1 g.add_machine_vertex_instance(vertex) # Put the vertex on the same board as the coordinator vertex.add_constraint(ChipAndCoreConstraint(chip.x, chip.y)) # Add an edge from the coordinator to the vertex, to send the data g.add_machine_edge_instance(MachineEdge(coordinator, vertex), coordinator.data_partition_name) # Add an edge from the vertex to the coordinator, # to send acknowledgement g.add_machine_edge_instance(MachineEdge(vertex, coordinator), coordinator.acknowledge_partition_name) if (model.root_finder): # Create a root finder vertex rf_vertex = MCMCRootFinderVertex(vertex, model) n_root_finders += 1 g.add_machine_vertex_instance(rf_vertex) # put it on the same chip as the standard mcmc vertex? # no - put it on a "nearby" chip, however that works rf_vertex.add_constraint(ChipAndCoreConstraint(chip.x, chip.y)) # Add an edge from mcmc vertex to root finder vertex, # to "send" the data - need to work this out g.add_machine_edge_instance(MachineEdge(vertex, rf_vertex), vertex.parameter_partition_name) # Add edge from root finder vertex back to mcmc vertex # to send acknowledgement / result - need to work this out g.add_machine_edge_instance(MachineEdge(rf_vertex, vertex), vertex.result_partition_name) if (model.cholesky): # Create a Cholesky vertex cholesky_vertex = MCMCCholeskyVertex(vertex, model) n_cholesky += 1 g.add_machine_vertex_instance(cholesky_vertex) # put it on the same chip as the standard mcmc vertex? # no - put it on a "nearby" chip, however that works cholesky_vertex.add_constraint( ChipAndCoreConstraint(chip.x, chip.y)) # Add an edge from mcmc vertex to Cholesky vertex, # to "send" the data - need to work this out g.add_machine_edge_instance( MachineEdge(vertex, cholesky_vertex), vertex.cholesky_partition_name) # Add edge from Cholesky vertex back to mcmc vertex # to send acknowledgement / result - need to work this out g.add_machine_edge_instance( MachineEdge(cholesky_vertex, vertex), vertex.cholesky_result_partition_name) start_computing_time = time.time() logger.info("n_chips_on_machine {}".format(n_chips_on_machine)) logger.info("Running {} worker cores".format(n_workers)) if (model.root_finder): logger.info("Running {} root finder cores".format(n_root_finders)) if (model.cholesky): logger.info("Running {} Cholesky cores".format(n_cholesky)) # Run the simulation g.run_until_complete() mid_computing_time = time.time() # Wait for the application to finish txrx = g.transceiver() app_id = globals_variables.get_simulator()._app_id logger.info("Running {} worker cores".format(n_workers)) if (model.root_finder): logger.info("Running {} root finder cores".format(n_root_finders)) if (model.cholesky): logger.info("Running {} Cholesky cores".format(n_cholesky)) logger.info("Waiting for application to finish...") running = txrx.get_core_state_count(app_id, CPUState.RUNNING) # there are now cores doing extra_monitor etc. non_worker_cores = n_chips_on_machine + (2 * len(boards)) while running > non_worker_cores: time.sleep(0.5) error = txrx.get_core_state_count(app_id, CPUState.RUN_TIME_EXCEPTION) watchdog = txrx.get_core_state_count(app_id, CPUState.WATCHDOG) if error > 0 or watchdog > 0: error_msg = "Some cores have failed ({} RTE, {} WDOG)".format( error, watchdog) raise Exception(error_msg) running = txrx.get_core_state_count(app_id, CPUState.RUNNING) print('running: ', running) finish_computing_time = time.time() # Get the data back samples = dict() for coord, coordinator in iteritems(coordinators): samples[coord[0], coord[1]] = coordinator.read_samples(g.buffer_manager()) # Close the machine g.stop() finish_time = time.time() # Note: this timing appears to be incorrect now; needs looking at print("Overhead time is %s seconds" % (start_computing_time - start_time)) print("Computing time is %s seconds" % (finish_computing_time - start_computing_time)) print("run_until_complete takes %s seconds" % (mid_computing_time - start_computing_time)) print("Data collecting time is %s seconds" % (finish_time - finish_computing_time)) print("Overall running time is %s seconds" % (finish_time - start_time)) return samples
def run_model(data, n_chips=None, n_ihcan=0, fs=44100, resample_factor=1): # Set up the simulation g.setup(n_chips_required=n_chips, model_binary_module=model_binaries) # Get the number of cores available for use n_cores = 0 machine = g.machine() # Create a OME for each chip boards = dict() #changed to lists to ensure data is read back in the same order that verticies are instantiated ihcans = list() cf_index = 0 count = 0 for chip in machine.chips: if count >= n_chips: break else: boards[chip.x, chip.y] = chip.ip_address for j in range(n_ihcan): ihcan = IHCANVertex(data[j][:], fs, resample_factor) g.add_machine_vertex_instance(ihcan) # constrain placement to local chip ihcan.add_constraint(ChipAndCoreConstraint(chip.x, chip.y)) #ihcans[chip.x, chip.y,j] = ihcan ihcans.append(ihcan) count = count + 1 # Run the simulation g.run(None) # Wait for the application to finish txrx = g.transceiver() app_id = globals_variables.get_simulator()._app_id #logger.info("Running {} worker cores".format(n_workers)) logger.info("Waiting for application to finish...") running = txrx.get_core_state_count(app_id, CPUState.RUNNING) while running > 0: time.sleep(0.5) error = txrx.get_core_state_count(app_id, CPUState.RUN_TIME_EXCEPTION) watchdog = txrx.get_core_state_count(app_id, CPUState.WATCHDOG) if error > 0 or watchdog > 0: error_msg = "Some cores have failed ({} RTE, {} WDOG)".format( error, watchdog) raise Exception(error_msg) running = txrx.get_core_state_count(app_id, CPUState.RUNNING) # Get the data back samples = list() progress = ProgressBar(len(ihcans), "Reading results") for ihcan in ihcans: samples.append(ihcan.read_samples(g.buffer_manager())) progress.update() progress.end() samples = numpy.hstack(samples) # Close the machine g.stop() print "channels running: ", len(ihcans) / 5.0 print "output data: {} fibres with length {}".format( len(ihcans) * 2, len(samples)) #if(len(samples) != len(ihcans)*2*numpy.floor(len(data[0][0])/100)*100*(1.0/resample_factor)): if (len(samples) != len(ihcans) * 2 * numpy.floor(len(data[0][0]) / 96) * 96): #print "samples length {} isn't expected size {}".format(len(samples),len(ihcans)*2*numpy.floor(len(data[0][0])/100)*100*(1.0/resample_factor)) print "samples length {} isn't expected size {}".format( len(samples), len(ihcans) * 2 * numpy.floor(len(data[0][0]) / 96) * 96) return samples