def nonParametricResample(self): """ Resamples all histograms based on data from all experiments. Returns: ResampledSuper: PartialMaxLike instance for single bootstrap resample """ new_hists = [] for i in range(self.num_hists): new_hists.append(hg.Hist()) new_hists[-1].makeFromHist(self.hists[i].resample(), self.hists[i].rawhistbins) ResampledSuper = PartialMaxLike(new_hists, self.input_state, self.unitaries, self.P_j, train_frac=0, bin_flag=False, measure=self.measure, targets=self.targets, autosave=False) # share matlab engine if it exists for expecation SDP #resampledSuper.eng = self.startMatlabEng() \ # if self.eng is not None else None if self.eng is not None: ResampledSuper.eng = self.startMatlabEng() else: ResampledSuper.eng = None return ResampledSuper
def trainingSample(self, train_frac=0.1): """ Samples from references without replacement to generate training data. Randomly select train_frac of reference histogram data and use it to generate an instance of AnalysisParamters called TrainParameters. Args: train_frac: fraction of reference data to use for training """ self.train_frac = train_frac if (self.train_frac <= 0 or self.train_frac >= 1): return None TrainHists = [] for i in range(self.ref_num): TrainHists.append(hg.Hist()) ref_ind = self.ref_ind[i] train_trial = int(np.floor(self.train_frac*self.hists[ref_ind].trials)) counts = hg.makeCounts(self.hists[ref_ind].raw_hist, self.hists[ref_ind].raw_hist_bins) # Samples random counts from raw histogram as training data train_ind = np.random.choice(self.hists[ref_ind].trials, train_trial, replace=False) # Pulls out training data hist_ind = np.setdiff1d(np.arange( self.hists[ref_ind].trials), train_ind) # Make both histograms (remake original with less counts) orig_hist = self.hists[ref_ind].hist orig_bins = self.hists[ref_ind].bin_bounds TrainHists[i].makeFromCounts(counts[train_ind]) self.hists[ref_ind].makeFromCounts(counts[hist_ind]) TrainHists[i].rebin(orig_bins) self.hists[ref_ind].rebin(orig_bins) assert np.all(orig_hist == self.hists[ref_ind].hist + TrainHists[i].hist) # Return to binning for remaining reference histograms self.rebinParams() # Make training samples an AnalysisParameter object input_state = [np.array(self.input_state[self.ref_ind[k]]) for k in range(self.ref_num)] self.TrainParameters = AnalysisParameters(TrainHists, input_state, self.unitaries[self.ref_ind], self.P_j)
def parametricResample(self): """ Resamples all histograms using estimated states and transition matrix. Note: this does not preserve the number of distinguishable ions nor can these histograms be rebinned to more bins. Returns: ResampledSuper: PartialMaxLike instance for single bootstrap resample """ new_hists = [] for i in range(self.num_hists): # use populations estimated from tomography to calculate # probability of observing of each "count" prob = self.tom.pops[i].dot(self.est_Q) new_hists.append(hg.Hist()) hist = np.random.multinomial(self.hists[i].trials, prob) new_hists[-1].makeFromHist(hist, self.hists[i].bin_bounds) new_hists[-1].max_counts = self.hists[i].max_counts new_hists[-1].min_counts = self.hists[i].min_counts ResampledSuper = PartialMaxLike(new_hists, self.input_state, self.unitaries, self.P_j, train_frac=0, bin_flag=False, measure=self.measure, targets=self.targets, autosave=False) # share matlab engine if it exists for expecation SDP # ResampledSuper.eng = self.startMatlabEng() \ # if self.eng is not None else None if self.eng is not None: ResampledSuper.eng = self.startMatlabEng() else: ResampledSuper.eng = None ResampledSuper.bin_bounds = self.bin_bounds ResampledSuper.mbins = self.mbins ResampledSuper.bins = self.bins return ResampledSuper
def scale_and_resample_set_histogram(self, unscaled_set_reuse_histogram, scale_factor, prescaled=False): new_hist = histogram.Hist() if not self.already_printed_quantitization_warning: self.debug( "WARNING: quantitization when resampling the reuse histogram may be a big problem.", 0) self.already_printed_quantitization_warning = True # If it is pre-scaled do nothing if prescaled: for v, c in unscaled_set_reuse_histogram: new_hist.add(v, c) return new_hist # Otherwise scale and insert into the histogram for v, c in unscaled_set_reuse_histogram: if v == sys.maxint: scaled_v = v else: scaled_v = int(math.floor(v * scale_factor)) new_hist.add(scaled_v, c) return new_hist
######################### BUILD HISTOGRAMS FROM DATA ########################## # Ion trappers, make a list of counts using countsFromExperimentClass() # in general a list of counts or an existing histogram (array) will do hists = [] # list of ALL histograms input_state = [] # list of input states (labels for unknown states) unitaries = [] # analysis unitaries ## Reference data (doesn't need any known unitary since individual addressing ## allows preperation of states with overlap of each underlying POVM element) # Dark, Dark Reference dark = np.zeros((dim, dim))+0j dark[0, 0] = 1.0 c = np.loadtxt('hist0',delimiter=',') h = hg.Hist(c) hists.append(h) input_state.append(dark) unitaries.append(np.eye(dim)) # Dark, Bright Reference db = np.zeros((dim, dim))+0j db[1,1] = 1.0 c = np.loadtxt('hist1',delimiter=',') h = hg.Hist(c) hists.append(h) input_state.append(db) unitaries.append(np.eye(dim)) # Bright, Dark Reference bd = np.zeros((dim, dim))+0j
def process(self): try: self.full_sample_set_counts except NameError: self.print_and_exit( "The set reuse distance has not been computed before calling process." ) if (not self.count_dangling): self.debug("Note: NOT counting Dangling references.", 0) # Per burst data: # The reuse distance histograms from the sampled data (unscaled) self.sampled_reuse_distance_histograms = [] # The set access histograms from the sampled data self.sampled_set_access_histograms = [] # The real reuse distance histograms from the raw data (exact per-set distances) self.full_reuse_distance_histograms = [] # The real set access histograms from the raw data (all accesses int the burst included) self.full_set_access_histograms = [] # # Get an interator for the full trace # full_trace_iterator = self.full_trace_usf.__iter__().__iter__() current_burst = -1 current_burst_sample = 0 total_samples = 0 total_skipped_dangling = 0 # We need to track the burst start/end time so we know how much of the full trace # to walk through to get the actual set access histogram burst_start_times = [] burst_end_times = [] # Go through the sampled trace t0 = time.time() for event in self.sampled_trace_usf: # For each burst in the sampled trace weadd new structures to the per-burst data if isinstance(event, pyusf.Burst): if (current_burst > 0): self.debug("Burst " + str(current_burst) + " start: " + str(burst_start_times[current_burst]) + " end: " + str(burst_end_times[current_burst]) + \ " with " + str(current_burst_sample) + " samples.", 3) current_burst += 1 current_burst_sample = 0 burst_start_times.append(event.begin_time) burst_end_times.append(event.begin_time) # Each gets one histogram per set burst_sampled_reuse_distance_histograms_per_set = [] burst_full_reuse_distance_histograms_per_set = [] for x in range(0, self.number_of_sets): burst_sampled_reuse_distance_histograms_per_set.append( histogram.Hist()) burst_full_reuse_distance_histograms_per_set.append( histogram.Hist()) self.sampled_reuse_distance_histograms.append( burst_sampled_reuse_distance_histograms_per_set) self.full_reuse_distance_histograms.append( burst_full_reuse_distance_histograms_per_set) # The access histograms are one per burst burst_sampled_set_access_histogram = histogram.Hist() self.sampled_set_access_histograms.append( burst_sampled_set_access_histogram) burst_full_set_access_histogram = histogram.Hist() self.full_set_access_histograms.append( burst_full_set_access_histogram) # We don't do anything for the burst per se, so continue continue total_samples += 1 # Update the set access histogram with the address accessed # Get the address and set # Note: do I need to do something special with the length? sample_address = event.begin.addr # Get the set and update the set access histogram sample_set = self.set_for_address(sample_address) burst_sampled_set_access_histogram.add(sample_set) # Get the reuse distance if isinstance(event, pyusf.Sample): sample_reuse_distance = event.end.time - event.begin.time - 1 elif isinstance(event, pyusf.Dangling): if (not self.count_dangling): total_skipped_dangling += 1 continue # Why is this done for Dangling? Doesn't this bias the histograms terribly? sample_reuse_distance = sys.maxint else: self.print_and_exit("Unexpected event type: " + str(type(event))) # Record the reuse distance burst_sampled_reuse_distance_histograms_per_set[sample_set].add( sample_reuse_distance) # Check if this is the latest start address in the burst if (event.begin.time > burst_end_times[current_burst]): burst_end_times[current_burst] = event.begin.time # Find the actual reuse distance from the pre-scanned data # Begin data is the same for both Samples and Dangling (begin_address, begin_set_count) = self.full_sample_set_counts[event.begin.time] if (begin_address != event.begin.addr): self.print_and_exit( "Event address does not match address in full sample set counts" ) # End data is different for Samples and Dangling # Samples have their end data recorded for a particular time/address # Dangling have an "infinite" reuse distance if isinstance(event, pyusf.Sample): (end_address, end_set_count) = self.full_sample_set_counts[event.end.time] if (end_address != event.end.addr): self.print_and_exit( "Event address does not match address in full sample set counts" ) actual_set_accesses = end_set_count - begin_set_count - 1 elif isinstance(event, pyusf.Dangling): actual_set_accesses = sys.maxint # Calcualte and record the actual reuse distance burst_full_reuse_distance_histograms_per_set[sample_set].add( actual_set_accesses) self.debug("> Sample " + str(current_burst_sample) + ":\t addr=" + str(sample_address) + \ ", set=" + str(sample_set) + ", raw dist=" + str(sample_reuse_distance) + \ ", real dist=" + str(actual_set_accesses) + ", time=" + str(event.begin.time), 4) current_burst_sample += 1 self.debug("Burst " + str(current_burst) + " start: " + str(burst_start_times[current_burst]) + " end: " + str(burst_end_times[current_burst]) + \ " with " + str(current_burst_sample) + " samples.", 3) # Now we have the burst_start_times and burst_end_times for all bursts # so we can go through the full trace and determine the full_set_access_histograms for burst in range(0, len(burst_start_times)): self.generate_full_trace_set_access_histogram(burst_start_times[burst], burst_end_times[burst], \ self.full_set_access_histograms[burst]) self.debug( "\tDone processing " + str(total_samples) + " total samples (" + str(current_burst + 1) + " bursts) in " + str(round(time.time() - t0, 2)) + "s. (skipped " + str(total_skipped_dangling) + " dangling samples)", 0) return
def precompute_set_reuse_distances(self): tstart = time.time() self.full_set_access_counts = histogram.Hist() trigger_times = {} #time -> address self.full_sample_set_counts = {} # time -> (address,set-count) # Find all the sample time/addresses we need to watch self.debug( "Scanning through sampled trace to determine watched addresses...", 2) t0 = time.time() for event in self.sampled_trace_usf: # Skip burst events if isinstance(event, pyusf.Burst): continue # Otherwise we need to watch the begin of the sample trigger_times[event.begin.time] = event.begin.addr # If it is a sample (i.e., not dangling) we need to watch the end as well if isinstance(event, pyusf.Sample): trigger_times[event.end.time] = event.end.addr self.debug( "\tFound " + str(len(trigger_times)) + " watched addresses in " + str(round(time.time() - t0, 2)) + "s.", 2) # Go through the full trace and do two things: # 1) keep track of the accesses to each set # 2) for each time/address we need to watch, record the accesses to that set when it happens self.debug( "Scanning through full trace to determine set accesses at each sampled address...", 2) t0 = time.time() for event in self.full_trace_usf: if isinstance(event, pyusf.Trace): # Get the set for this access set = self.set_for_address(event.access.addr) # Count it self.full_set_access_counts.add(set) # If we are watching this time and the address matches then record its set count and # stop watching it if trigger_times.has_key(event.access.time): # Make sure the time matches the expected address if (trigger_times[event.access.time] != event.access.addr): self.print_and_exit("Access at time " + str(event.access.time) + " does not match expected address: " \ + str(event.acccess.addr) + " != " + trigger_times[event.access.time]) # Make sure we don't already have this time recorded if (event.access.time in self.full_sample_set_counts.keys()): self.print_and_exit("Access time " + str(event.access.time) + " is already in the " \ + " full sample set counts as " + str(self.full_sample_set_counts[event.access.time])) # Record the data for this access time self.full_sample_set_counts[event.access.time] = ( event.access.addr, self.full_set_access_counts[set]) # Remove this time from the list of events to check del trigger_times[event.access.time] self.debug( "\tDone scanning for set accesses for all memory samples in " + str(round(time.time() - t0, 2)) + "s.", 2) # Close and re-open the traces self.reload_usf_files() self.debug( "\tPre-computed baseline set reuse distances in " + str(round(time.time() - tstart, 2)) + "s.", 1) return