Beispiel #1
0
    def nonParametricResample(self):
        """
        Resamples all histograms based on data from all experiments.

        Returns: 
            ResampledSuper: PartialMaxLike instance for single bootstrap
                            resample
        
        """
        new_hists = []
        for i in range(self.num_hists):
            new_hists.append(hg.Hist())
            new_hists[-1].makeFromHist(self.hists[i].resample(),
                                       self.hists[i].rawhistbins)
            
        ResampledSuper = PartialMaxLike(new_hists, self.input_state,
                                        self.unitaries, self.P_j,
                                        train_frac=0, bin_flag=False,
                                        measure=self.measure,
                                        targets=self.targets, 
                                        autosave=False)
        # share matlab engine if it exists for expecation SDP
        #resampledSuper.eng = self.startMatlabEng() \
         #                    if self.eng is not None else None
        if self.eng is not None:
            ResampledSuper.eng = self.startMatlabEng() 
        else:
            ResampledSuper.eng = None
            
        return ResampledSuper
Beispiel #2
0
    def trainingSample(self, train_frac=0.1):
        """
        Samples from references without replacement to generate training data.
        
        Randomly select train_frac of reference histogram data and use it to
        generate an instance of AnalysisParamters called TrainParameters. 
        
        Args:
            train_frac:     fraction of reference data to use for training
            
        """
        self.train_frac = train_frac  

        if (self.train_frac <= 0 or self.train_frac >= 1):
            return None

        TrainHists = []
        for i in range(self.ref_num):
            TrainHists.append(hg.Hist())
            ref_ind = self.ref_ind[i]
            train_trial = int(np.floor(self.train_frac*self.hists[ref_ind].trials))
            counts = hg.makeCounts(self.hists[ref_ind].raw_hist,
                                   self.hists[ref_ind].raw_hist_bins)
            # Samples random counts from raw histogram as training data
            train_ind = np.random.choice(self.hists[ref_ind].trials,
                                         train_trial, replace=False)
            # Pulls out training data
            hist_ind = np.setdiff1d(np.arange(
                                    self.hists[ref_ind].trials), train_ind)
            # Make both histograms (remake original with less counts)
            orig_hist = self.hists[ref_ind].hist       
            orig_bins = self.hists[ref_ind].bin_bounds
            TrainHists[i].makeFromCounts(counts[train_ind])
            self.hists[ref_ind].makeFromCounts(counts[hist_ind])
            TrainHists[i].rebin(orig_bins)
            self.hists[ref_ind].rebin(orig_bins)
            assert np.all(orig_hist ==
                          self.hists[ref_ind].hist + TrainHists[i].hist)

        # Return to binning for remaining reference histograms
        self.rebinParams()

        # Make training samples an AnalysisParameter object
        input_state = [np.array(self.input_state[self.ref_ind[k]]) 
                       for k in range(self.ref_num)]
        self.TrainParameters = AnalysisParameters(TrainHists, input_state,
                                                  self.unitaries[self.ref_ind], 
                                                  self.P_j)
Beispiel #3
0
    def parametricResample(self):
        """
        Resamples all histograms using estimated states and transition matrix.

        Note: this does not preserve the number of distinguishable ions nor
        can these histograms be rebinned to more bins.

        Returns: 
            ResampledSuper: PartialMaxLike instance for single bootstrap
                            resample
        
        """
        new_hists = []
        for i in range(self.num_hists):
            # use populations estimated from tomography to calculate
            # probability of observing of each "count"
            prob = self.tom.pops[i].dot(self.est_Q)
            new_hists.append(hg.Hist())
            hist = np.random.multinomial(self.hists[i].trials, prob)
            new_hists[-1].makeFromHist(hist, self.hists[i].bin_bounds)
            new_hists[-1].max_counts = self.hists[i].max_counts
            new_hists[-1].min_counts = self.hists[i].min_counts

        ResampledSuper = PartialMaxLike(new_hists, self.input_state,
                                        self.unitaries, self.P_j,
                                        train_frac=0, bin_flag=False,
                                        measure=self.measure,
                                        targets=self.targets, 
                                        autosave=False)
        # share matlab engine if it exists for expecation SDP
        # ResampledSuper.eng = self.startMatlabEng() \
        #                       if self.eng is not None else None
        if self.eng is not None:
            ResampledSuper.eng = self.startMatlabEng() 
        else:
            ResampledSuper.eng = None

        ResampledSuper.bin_bounds = self.bin_bounds
        ResampledSuper.mbins = self.mbins
        ResampledSuper.bins = self.bins
        
        return ResampledSuper
Beispiel #4
0
 def scale_and_resample_set_histogram(self,
                                      unscaled_set_reuse_histogram,
                                      scale_factor,
                                      prescaled=False):
     new_hist = histogram.Hist()
     if not self.already_printed_quantitization_warning:
         self.debug(
             "WARNING: quantitization when resampling the reuse histogram may be a big problem.",
             0)
         self.already_printed_quantitization_warning = True
     # If it is pre-scaled do nothing
     if prescaled:
         for v, c in unscaled_set_reuse_histogram:
             new_hist.add(v, c)
         return new_hist
     # Otherwise scale and insert into the histogram
     for v, c in unscaled_set_reuse_histogram:
         if v == sys.maxint:
             scaled_v = v
         else:
             scaled_v = int(math.floor(v * scale_factor))
         new_hist.add(scaled_v, c)
     return new_hist
           
######################### BUILD HISTOGRAMS FROM DATA ##########################
# Ion trappers, make a list of counts using countsFromExperimentClass()
# in general a list of counts or an existing histogram (array) will do
hists = []         # list of ALL histograms
input_state = []   # list of input states (labels for unknown states)
unitaries = []     # analysis unitaries

## Reference data (doesn't need any known unitary since individual addressing
## allows preperation of states with overlap of each underlying POVM element)
# Dark, Dark Reference
dark = np.zeros((dim, dim))+0j
dark[0, 0] = 1.0
c = np.loadtxt('hist0',delimiter=',')
h = hg.Hist(c)
hists.append(h)
input_state.append(dark)
unitaries.append(np.eye(dim))

# Dark, Bright Reference
db =  np.zeros((dim, dim))+0j
db[1,1] = 1.0
c = np.loadtxt('hist1',delimiter=',')
h = hg.Hist(c)
hists.append(h)
input_state.append(db)
unitaries.append(np.eye(dim))

# Bright, Dark Reference
bd =  np.zeros((dim, dim))+0j
Beispiel #6
0
    def process(self):
        try:
            self.full_sample_set_counts
        except NameError:
            self.print_and_exit(
                "The set reuse distance has not been computed before calling process."
            )

        if (not self.count_dangling):
            self.debug("Note: NOT counting Dangling references.", 0)
        # Per burst data:
        #  The reuse distance histograms from the sampled data (unscaled)
        self.sampled_reuse_distance_histograms = []
        #  The set access histograms from the sampled data
        self.sampled_set_access_histograms = []
        #  The real reuse distance histograms from the raw data (exact per-set distances)
        self.full_reuse_distance_histograms = []
        #  The real set access histograms from the raw data (all accesses int the burst included)
        self.full_set_access_histograms = []

        #        # Get an interator for the full trace
        #        full_trace_iterator = self.full_trace_usf.__iter__().__iter__()

        current_burst = -1
        current_burst_sample = 0
        total_samples = 0
        total_skipped_dangling = 0
        # We need to track the burst start/end time so we know how much of the full trace
        # to walk through to get the actual set access histogram
        burst_start_times = []
        burst_end_times = []
        # Go through the sampled trace
        t0 = time.time()
        for event in self.sampled_trace_usf:
            # For each burst in the sampled trace weadd new structures to the per-burst data
            if isinstance(event, pyusf.Burst):
                if (current_burst > 0):
                    self.debug("Burst " + str(current_burst) + " start: " + str(burst_start_times[current_burst]) + " end: " + str(burst_end_times[current_burst]) + \
                               " with " + str(current_burst_sample) + " samples.", 3)
                current_burst += 1
                current_burst_sample = 0
                burst_start_times.append(event.begin_time)
                burst_end_times.append(event.begin_time)

                # Each gets one histogram per set
                burst_sampled_reuse_distance_histograms_per_set = []
                burst_full_reuse_distance_histograms_per_set = []
                for x in range(0, self.number_of_sets):
                    burst_sampled_reuse_distance_histograms_per_set.append(
                        histogram.Hist())
                    burst_full_reuse_distance_histograms_per_set.append(
                        histogram.Hist())
                self.sampled_reuse_distance_histograms.append(
                    burst_sampled_reuse_distance_histograms_per_set)
                self.full_reuse_distance_histograms.append(
                    burst_full_reuse_distance_histograms_per_set)
                # The access histograms are one per burst
                burst_sampled_set_access_histogram = histogram.Hist()
                self.sampled_set_access_histograms.append(
                    burst_sampled_set_access_histogram)
                burst_full_set_access_histogram = histogram.Hist()
                self.full_set_access_histograms.append(
                    burst_full_set_access_histogram)

                # We don't do anything for the burst per se, so continue
                continue

            total_samples += 1
            # Update the set access histogram with the address accessed
            # Get the address and set
            # Note: do I need to do something special with the length?
            sample_address = event.begin.addr
            # Get the set and update the set access histogram
            sample_set = self.set_for_address(sample_address)
            burst_sampled_set_access_histogram.add(sample_set)

            # Get the reuse distance
            if isinstance(event, pyusf.Sample):
                sample_reuse_distance = event.end.time - event.begin.time - 1
            elif isinstance(event, pyusf.Dangling):
                if (not self.count_dangling):
                    total_skipped_dangling += 1
                    continue
                # Why is this done for Dangling? Doesn't this bias the histograms terribly?
                sample_reuse_distance = sys.maxint
            else:
                self.print_and_exit("Unexpected event type: " +
                                    str(type(event)))
            # Record the reuse distance
            burst_sampled_reuse_distance_histograms_per_set[sample_set].add(
                sample_reuse_distance)

            # Check if this is the latest start address in the burst
            if (event.begin.time > burst_end_times[current_burst]):
                burst_end_times[current_burst] = event.begin.time

            # Find the actual reuse distance from the pre-scanned data
            # Begin data is the same for both Samples and Dangling
            (begin_address,
             begin_set_count) = self.full_sample_set_counts[event.begin.time]
            if (begin_address != event.begin.addr):
                self.print_and_exit(
                    "Event address does not match address in full sample set counts"
                )
            # End data is different for Samples and Dangling
            # Samples have their end data recorded for a particular time/address
            # Dangling have an "infinite" reuse distance
            if isinstance(event, pyusf.Sample):
                (end_address,
                 end_set_count) = self.full_sample_set_counts[event.end.time]
                if (end_address != event.end.addr):
                    self.print_and_exit(
                        "Event address does not match address in full sample set counts"
                    )
                actual_set_accesses = end_set_count - begin_set_count - 1
            elif isinstance(event, pyusf.Dangling):
                actual_set_accesses = sys.maxint
            # Calcualte and record the actual reuse distance
            burst_full_reuse_distance_histograms_per_set[sample_set].add(
                actual_set_accesses)

            self.debug("> Sample " + str(current_burst_sample) + ":\t addr=" + str(sample_address) + \
                       ", set=" + str(sample_set) + ", raw dist=" + str(sample_reuse_distance) + \
                       ", real dist=" + str(actual_set_accesses) + ", time=" + str(event.begin.time), 4)
            current_burst_sample += 1

        self.debug("Burst " + str(current_burst) + " start: " + str(burst_start_times[current_burst]) + " end: " + str(burst_end_times[current_burst]) + \
                               " with " + str(current_burst_sample) + " samples.", 3)
        # Now we have the burst_start_times and burst_end_times for all bursts
        # so we can go through the full trace and determine the full_set_access_histograms
        for burst in range(0, len(burst_start_times)):
            self.generate_full_trace_set_access_histogram(burst_start_times[burst], burst_end_times[burst], \
                                                            self.full_set_access_histograms[burst])

        self.debug(
            "\tDone processing " + str(total_samples) + " total samples (" +
            str(current_burst + 1) + " bursts) in " +
            str(round(time.time() - t0, 2)) + "s. (skipped " +
            str(total_skipped_dangling) + " dangling samples)", 0)
        return
Beispiel #7
0
    def precompute_set_reuse_distances(self):
        tstart = time.time()
        self.full_set_access_counts = histogram.Hist()
        trigger_times = {}  #time -> address
        self.full_sample_set_counts = {}  # time -> (address,set-count)

        # Find all the sample time/addresses we need to watch
        self.debug(
            "Scanning through sampled trace to determine watched addresses...",
            2)
        t0 = time.time()
        for event in self.sampled_trace_usf:
            # Skip burst events
            if isinstance(event, pyusf.Burst):
                continue
            # Otherwise we need to watch the begin of the sample
            trigger_times[event.begin.time] = event.begin.addr
            # If it is a sample (i.e., not dangling) we need to watch the end as well
            if isinstance(event, pyusf.Sample):
                trigger_times[event.end.time] = event.end.addr

        self.debug(
            "\tFound " + str(len(trigger_times)) + " watched addresses in " +
            str(round(time.time() - t0, 2)) + "s.", 2)

        # Go through the full trace and do two things:
        # 1) keep track of the accesses to each set
        # 2) for each time/address we need to watch, record the accesses to that set when it happens
        self.debug(
            "Scanning through full trace to determine set accesses at each sampled address...",
            2)
        t0 = time.time()
        for event in self.full_trace_usf:
            if isinstance(event, pyusf.Trace):
                # Get the set for this access
                set = self.set_for_address(event.access.addr)
                # Count it
                self.full_set_access_counts.add(set)
                # If we are watching this time and the address matches then record its set count and
                # stop watching it
                if trigger_times.has_key(event.access.time):
                    # Make sure the time matches the expected address
                    if (trigger_times[event.access.time] != event.access.addr):
                        self.print_and_exit("Access at time " + str(event.access.time) + " does not match expected address: " \
                                            + str(event.acccess.addr) + " != " + trigger_times[event.access.time])
                    # Make sure we don't already have this time recorded
                    if (event.access.time
                            in self.full_sample_set_counts.keys()):
                        self.print_and_exit("Access time " + str(event.access.time) + " is already in the " \
                                            + " full sample set counts as " + str(self.full_sample_set_counts[event.access.time]))
                    # Record the data for this access time
                    self.full_sample_set_counts[event.access.time] = (
                        event.access.addr, self.full_set_access_counts[set])
                    # Remove this time from the list of events to check
                    del trigger_times[event.access.time]

        self.debug(
            "\tDone scanning for set accesses for all memory samples in " +
            str(round(time.time() - t0, 2)) + "s.", 2)

        # Close and re-open the traces
        self.reload_usf_files()
        self.debug(
            "\tPre-computed baseline set reuse distances in " +
            str(round(time.time() - tstart, 2)) + "s.", 1)

        return