def _get_means_stdevs(cls, x, y):
        x_y_counter_lin = cls._convert_x_y_to_counter(x, y)
        x_y_counter = cls._convert_x_y_to_counter(x, [ln(y_i) for y_i in y])

        st_dev = {x: ln(stdev(y) if stdev(y) > 0 else 1 ** -10) for x, y in x_y_counter_lin.items()}
        mean_ = {x: mean(y) for x, y in x_y_counter.items()}
        return cls._get_mean_stdev_from_counter(x_y_counter, st_dev, mean_)
def cohens_d(df, value='score_combined', group_col='patient_diagnosis_super_class'):
    groups = df[group_col].value_counts()
    print('warning, this is a paired ttest, it will use the 2 groups with most data:\n',
          groups.index[0], groups.index[1])
    rvs1 = df.loc[df[group_col] == groups.index[0], [value]]
    rvs2 = df.loc[df[group_col] == groups.index[1], [value]]
    res = (np.mean(rvs1) - np.mean(rvs2)) / (np.sqrt((np.stdev(rvs1) ** 2 + np.stdev(rvs2) ** 2) / 2))
    return res
 def add_pixel_gain(self, gain):
 
 def add_nonlinear_gain(self, gain):
 
 def add_background(self, bg):
 
 @property
 def ensemble_statistics(self)
     stats = {}
     stats['raw intensity'] = (np.mean(self.raw_intensities), np.stdev(self.raw_intensities))
     if self.modified_intensities:
         stats['modified intensity'] = (np.mean(self.modified_intensities), np.stdev(self.modified_intensities))
     if self.raw_correlations:
         stats['raw correlation'] = (np.mean(self.raw_correlations), np.stdev(raw_correlations))
     if self.modified_correlations:
         stats['modified correlation'] = (np.mean(self.modified_correlations), np.stdev(self.modified_correlations))
     return stats
Exemple #4
0
def plot_peptide_avg_model_fits(dataset, model_pfs, num_models=100, outfile=None, show_plot=False):
    # Given a dataset and a set of models, plot the fit to the experimental data

    fig = plt.figure()
    ax = plt.gca()

    # First, calculate the average and SD of Deuterium incorporation at each peptide,
    # timepoint

    model_deuts = {}

    for pep in dataset.get_peptides():
        pep_deuts = {}
        for tp in pep.get_timepoints():
            tp_deuts = []
            for pfs in model_pfs:
                tp_deuts.append(get_timepoint_deuteration(peptide, tp.time, pfs))
            pep_deuts[tp.time] = tp_deuts

        model_deuts[pep.sequence] = pep_deuts

    for pep in dataset.get_peptides():

        pep_deut = model_deuts[pep.sequence]

        x=[]
        yavg=[]
        yerror=[]
        #print f.seq, x
        for tp in pep.get_timepoints():
            #print t.model
            x.append(t.time)
            xt=[int(t.time)]*len(tp.get_replicates())
            yt=[float(r.deut) for r in t.get_replicates()]
            plt.scatter(xt, yt)

            yavg.append(numpy.average(pep_deut[tp.time]))
            yerror.append(numpy.stdev(pep_deut[tp.time]))
        #plt.show()
        plt.errorbar(x, yavg, yerr=yerror)
        ax.set_xscale('log')
        ax.set_ylabel("%D Incorporation")
        ax.set_xlabel("Time (seconds)")
        #ax.set_xlim=(1,3600)
        plt.axis=(1,3600,0,100)
        #plt.text(1,1,chi)
        fig.title=(dataset.name +"_"+pep.sequence)#+"_"+str(chi))
        if outfile==None:
            plt.show()
        elif show_plot==False:
            plt.savefig(outfile, bbox_inches=0)      
        else:
            plt.show()
            plt.savefig(outfile, bbox_inches=0)
def get_stocks(stocks):
    stock_list = []
    price_list = []
    mean_list = []
    std_list = []
    for stock in stocks:
        growth = np.array(stock.growth)
        returns = np.array(stock.returns)
        multiple = np.array(stock.multiple)
        value = np.array(stock.value)
        matrix_of_stock = np.column_stack((growth, returns, multiple))
        mean_list.append(np.mean(growth))
        mean_list.append(np.mean(returns))
        mean_list.append(np.mean(multiple))
        mean_list.append(np.mean(value))
        std_list.append(np.stdev(growth))
        std_list.append(np.stdev(returns))
        std_list.append(np.stdev(multiple))
        std_list.append(np.stdev(value))
        stock_list.append(matrix_of_stock)
        price_list.append(value)
    return stock_list, price_list, mean_list, std_list
Exemple #6
0
 def printme(self, verbose=False):
     try:
         print "\nHa", np.mean(self.Ha)
         if verbose: print self.Ha
     except (IndexError, TypeError): pass
     try:
         print "\nHb", np.mean(self.Hb)
         if verbose: print self.Hb
     except (IndexError, TypeError): pass
     try:
         print  "\nO2",np.mean(self.O23727)
         if verbose: print self.O23727
     except (IndexError, TypeError): pass
     try:
         print  "\nO3",np.mean(self.O35007)
         if verbose: print self.O35007
     except (IndexError, TypeError): pass
     try:
         print  "\nO34959",np.mean(self.O34959)
         if verbose: print self.O34959
     except (IndexError, TypeError): pass
     try:
         print  "\nZ94",np.mean(self.mds['Z94'])
         if verbose: print self.mds['Z94']
     except (IndexError, TypeError): pass
     try:
         print  "\nR23",np.mean(self.R23)
         if verbose: print self.R23
     except (IndexError, TypeError): pass
     try:
         print "\nlog(R23)", np.mean(self.logR23)
         if verbose: print self.logR23
     except (TypeError, IndexError): pass
     try:
         print  "\nlog([NII][OII])",stats.nanmean(self.logN2O2)
         if verbose: print self.logN2O2
     except (TypeError, IndexError): pass
     try:
         print  "\nlog([OIII][OII])",stats.nanmean(self.logO3O2)
         if verbose:
             print self.logO3O2
     except (TypeError, IndexError): pass
     for k in self.mds.iterkeys():
         print "\n",k,
         try: print stats.nanmean(self.mds[k]), np.stdev(self.mds[k])
         except (IndexError,TypeError):
             if verbose: print self.mds[k]
Exemple #7
0
 def printme(self, verbose=False):
     try: 
         print "\nHa", np.mean(self.Ha)
         if verbose: print self.Ha
     except (IndexError, TypeError): pass
     try:
         print "\nHb", np.mean(self.Hb)
         if verbose: print self.Hb
     except (IndexError, TypeError): pass
     try:
         print  "\nO2",np.mean(self.O23727)
         if verbose: print self.O23727
     except (IndexError, TypeError): pass 
     try:
         print  "\nO3",np.mean(self.O35007)
         if verbose: print self.O35007
     except (IndexError, TypeError): pass 
     try:
         print  "\nO34959",np.mean(self.O34959)
         if verbose: print self.O34959
     except (IndexError, TypeError): pass 
     try:       
         print  "\nZ94",np.mean(self.mds['Z94'])
         if verbose: print self.mds['Z94']
     except (IndexError, TypeError): pass 
     try:       
         print  "\nR23",np.mean(self.R23)
         if verbose: print self.R23
     except (IndexError, TypeError): pass 
     try:       
         print "\nlog(R23)", np.mean(self.logR23)
         if verbose: print self.logR23
     except (TypeError, IndexError): pass 
     try:        
         print  "\nlog([NII][OII])",stats.nanmean(self.logN2O2)
         if verbose: print self.logN2O2
     except (TypeError, IndexError): pass
     try:        
         print  "\nlog([OIII][OII])",stats.nanmean(self.logO3O2)
         if verbose: 
             print self.logO3O2
     except (TypeError, IndexError): pass
     for k in self.mds.iterkeys():
         print "\n",k,
         try: print stats.nanmean(self.mds[k]), np.stdev(self.mds[k])
         except (IndexError,TypeError): 
             if verbose: print self.mds[k]
Exemple #8
0
 def get_noise(self, data):
     """Calculates noise over last dim in data (time), using .noisemethod"""
     #print('calculating noise')
     #ncores = mp.cpu_count()
     #pool = threadpool.Pool(ncores)
     if self.noisemethod == 'median':
         #noise = pool.map(self.get_median, data) # multithreads over rows in data
         #noise = np.median(np.abs(data), axis=-1) / 0.6745 # see Quiroga2004
         # np.abs does a copy, so modifying the result in-place is safe:
         noise = util.median_inplace_2Dshort(np.abs(data)) / 0.6745 # see Quiroga2004
         #noise = np.mean(np.abs(data), axis=-1) / 0.6745 / 1.2
         #noise = util.mean_2Dshort(np.abs(data)) / 0.6745 # see Quiroga2004
     elif self.noisemethod == 'stdev':
         #noise = pool.map(self.get_stdev, data) # multithreads over rows in data
         noise = np.stdev(data, axis=-1)
     else:
         raise ValueError
     #pool.terminate() # pool.close() doesn't allow Python to exit when spyke is closed
     #pool.join() # unnecessary, hangs
     #return np.asarray(noise)
     return noise
    def HasEnoughRuns(self, graph_config, confidence_level):
        """Checks if the mean of the results for a given trace config is within
    0.1% of the true value with the specified confidence level.

    This assumes Gaussian distribution of the noise and based on
    https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule.

    Args:
      graph_config: An instance of GraphConfig.
      confidence_level: Number of standard deviations from the mean that all
          values must lie within. Typical values are 1, 2 and 3 and correspond
          to 68%, 95% and 99.7% probability that the measured value is within
          0.1% of the true value.

    Returns:
      True if specified confidence level have been achieved.
    """
        if not isinstance(graph_config, TraceConfig):
            return all(
                self.HasEnoughRuns(child, confidence_level)
                for child in graph_config.children)

        trace = self.traces.get(graph_config.name, {})
        results = trace.get('results', [])
        logging.debug('HasEnoughRuns for %s', graph_config.name)

        if len(results) < MIN_RUNS_FOR_CONFIDENCE:
            logging.debug('  Ran %d times, need at least %d', len(results),
                          MIN_RUNS_FOR_CONFIDENCE)
            return False

        logging.debug('  Results: %d entries', len(results))
        avg = mean(results)
        avg_stderr = stdev(results) / sqrt(len(results))
        logging.debug('  Mean: %.2f, mean_stderr: %.2f', avg, avg_stderr)
        logging.info('>>> Confidence level is %.2f',
                     avg / (1000.0 * avg_stderr))
        return confidence_level * avg_stderr < avg / 1000.0
Exemple #10
0
    def exposureStatistics(self, statisticsDictionary):
        """Calculate exposure level statistics based on the existing
        per-amplifier and per-detector measurements.

        Parameters
        ----------
        statisticsDictionary : `dict [`str`, `dict` [`str`, scalar]],
            Dictionary of measured statistics.  The top level
            dictionary is keyed on the detector names, and contains
            the measured statistics from the per-detector
            measurements.

        Returns
        -------
        outputStatistics : `dict` [`str, scalar]
            A dictionary of the statistics measured and their values.
        """
        detectorMeans = []
        for detName, stats in statisticsDictionary.items():
            # Get detector stats:
            detectorMeans.append(stats['DET']['MEAN'])

        return {'SCATTER': np.stdev(detectorMeans)}
Exemple #11
0
    def tTest(self, a, b, alpha, hNull, hAlt):
        # remove nas
        temp = pd.DataFrame({'a': a, 'b': b}).dropna()
        a, b = temp['a'], temp['b']

        # get stdev
        stdev = np.stdev([a, b])

        # get stdev
        stdev = np.sqrt((varA + varB) / 2)

        # get test stat
        tStat = (np.mean(a) - np.mean(b)) / (stdev / np.sqrt(2 / n))

        # calcalte degrees of freedom and get p-value
        dof = 2 * n - 2
        pVal = 1 - stats.t.cdf(tStat, df=dof)

        t2, p2 = stats.ttest_ind(a, b)
        if pVal == p2:
            return (tStat, pVal)
        else:
            print("incorrect p value")
            return (t2, p2)
data = wave.read(song_file)
sample_rate = data[0]
data = numpy.asarray(data[1])
num_samples = len(data)
example_length = frame_length * seq_length

# Normalize?
if normalize == True:
    print "\nNormalizing -1 to 1..."
    data_mean = numpy.mean(data)
    data_min = numpy.min(data)
    data_max = numpy.max(data)
    data = rescale(data - data_mean, -1, 1, data_min, data_max)
    print "subtracted training mean and normalized data to [-1,1]"
elif normalize == "stdev":
    data_stdev = numpy.stdev(data)
    data = data / stdev
    print "divided by standard deviation"
else:
    print "using unnormalized data"

# Make sure data will be the right length for reshaping
print "\nGetting examples..."
shift = 0
num_examples = 0
data_to_use = []
while len(data[shift:]) >= example_length + frame_length:
    num_ex_this_pass = len(data[shift:]) // example_length
    data_to_use.extend(data[shift : shift + (num_ex_this_pass * example_length)])
    num_examples += num_ex_this_pass
    shift += example_shift
    def scan(self):

        initial_setpoint = self._get_control_f()
        scan_min = initial_setpoint + self._scan_min
        scan_max = initial_setpoint + self._scan_max

        #print 'initial_setpoint {:.2f},scan_min {:.2f},scan_max {:.2f}, steps {}'.format(initial_setpoint,scan_min,scan_max, steps)

        # First scan negative. See if is going up or down (or unclear)
        # If going down, switch to positive
        # If going up, keep going until goes down then break
        # If unclear, go to edge of scan range and return
        # Next do same for positive

        finished = 0

        ##########################################
        # Start with negative

        print "Scanning negative"

        steps = int((initial_setpoint - scan_min) / self._control_step_size)
        udrange_temp = np.linspace(initial_setpoint, scan_min, steps)

        values_temp = np.zeros(len(udrange_temp))
        true_udrange_temp = np.zeros(len(udrange_temp))

        smoothed_values = np.zeros(
            len(udrange_temp) - self.get_smoothing_N() + 1)
        max_value = 0
        max_frac_change = 0

        for i, sp in enumerate(udrange_temp):
            print i

            if (msvcrt.kbhit() and (msvcrt.getch() == 'q')):
                self._set_control_f(initial_setpoint)
                break
            #print 'sp',sp
            self._set_control_f(sp)
            if self.get_dwell_after_set():
                qt.msleep(self._dwell_time)
            true_udrange_temp[i] = self._get_control_f()
            values_temp[i] = self.get_value()

            # First of all, if above the threshold, thats us happy.
            if values_temp[i] > self.get_good_value():
                'Found good value!'
                finished = 1

                break

            if i >= (
                    self.get_smoothing_N() - 1
            ):  # Need to build up some values at start before considering whether decreasing or not

                smoothed_index = i - (self.get_smoothing_N() - 1)

                smoothed_values[smoothed_index] = np.mean(
                    values_temp[smoothed_index:(
                        i + 1)])  # very simple smoothing filter

                fractional_change = smoothed_values[
                    smoothed_index] / smoothed_values[
                        0]  # fractional change from first smoothed value

                if values_temp[
                        i] > max_value:  # Track the max value reached so far
                    max_value = values_temp[i]
                    max_frac_change = fractional_change

                # Try and measure the 'noise' in the signal
                if smoothed_index > 0:
                    diff_smoothed = (
                        smoothed_values[0:(smoothed_index - 1)] -
                        smoothed_values[1:smoothed_index]) / smoothed_values[0]
                else:
                    diff_smoothed = 0

                # If nothing is changing, noise in diff might be high
                not_too_noisy_to_tell = np.stdev(diff_smoothed) < (
                    1 - self.get_threshold_for_decreasing())

                if fractional_change < self.get_threshold_for_decreasing(
                ) and not_too_noisy_to_tell:  # Therefore must be decreasing from start
                    print "Decreasing in this direction!"

                    break

                if (max_value > self.get_min_peak_value()) and (
                        fractional_change <
                        max_frac_change * self.get_threshold_for_past_peak()
                ):  # Must have gone past peak
                    print "Gone past peak!"
                    finished = 1

                    break

        if finished == 0:  # If not finished yet

            print "Scanning positive"

            # Store measured values so far
            true_udrange = true_udrange_temp[0:i]
            values = values_temp[0:i]

            # Start by going back to start
            self._set_control_f(initial_setpoint)
            if self.get_dwell_after_set():
                for x in range(i + 4):
                    qt.msleep(
                        self._dwell_time
                    )  # Wait for a length of time determined by how far we got
                    if self._get_control_f(
                    ) > initial_setpoint + 0.05 * self._scan_min:
                        break
                    print x

            ##########################################
            # Do positive

            steps = int(
                (scan_max - initial_setpoint) / self._control_step_size)
            udrange_temp = np.linspace(initial_setpoint, scan_max, steps)

            values_temp = np.zeros(len(udrange_temp))
            true_udrange_temp = np.zeros(len(udrange_temp))

            smoothed_values = np.zeros(
                len(udrange_temp) - self.get_smoothing_N() + 1)
            max_value = 0
            max_frac_change = 0

            finished = 0

            for i, sp in enumerate(udrange_temp):
                if (msvcrt.kbhit() and (msvcrt.getch() == 'q')):
                    self._set_control_f(initial_setpoint)
                    break
                #print 'sp',sp
                self._set_control_f(sp)
                if self.get_dwell_after_set():
                    qt.msleep(self._dwell_time)
                true_udrange_temp[i] = self._get_control_f()
                values_temp[i] = self.get_value()

                # First of all, if above the threshold, thats us happy.
                if values_temp[i] > self.get_good_value():
                    'Found good value!'
                    finished = 1

                    break

                if i >= (
                        self.get_smoothing_N() - 1
                ):  # Need to build up some values at start before considering whether decreasing or not

                    smoothed_index = i - (self.get_smoothing_N() - 1)

                    smoothed_values[smoothed_index] = np.mean(
                        values_temp[smoothed_index:(
                            i + 1)])  # very simple smoothing filter

                    fractional_change = smoothed_values[
                        smoothed_index] / smoothed_values[
                            0]  # fractional change from first smoothed value

                    if values_temp[
                            i] > max_value:  # Track the max value reached so far
                        max_value = values_temp[i]
                        max_frac_change = fractional_change

                    # Try and measure the 'noise' in the signal
                    if smoothed_index > 0:
                        diff_smoothed = (
                            smoothed_values[0:(smoothed_index - 1)] -
                            smoothed_values[1:smoothed_index]
                        ) / smoothed_values[0]
                    else:
                        diff_smoothed = 0

                    # If nothing is changing, noise in diff might be high
                    not_too_noisy_to_tell = np.stdev(diff_smoothed) < (
                        1 - self.get_threshold_for_decreasing())

                    #basically_dead = smoothed_values[smoothed_index] < self.get_dead_value()

                    if fractional_change < self.get_threshold_for_decreasing(
                    ) and not_too_noisy_to_tell:  # Therefore must be decreasing from start
                        print "Decreasing in this direction!"

                        break

                    if (max_value > self.get_min_peak_value()) and (
                            fractional_change < max_frac_change *
                            self.get_threshold_for_past_peak()
                    ):  # Must have gone past peak
                        print "Gone past peak!"
                        finished = 1

                        break

            if finished == 0:
                # If neccessary, end by going back to start
                self._set_control_f(initial_setpoint)
                if self.get_dwell_after_set():
                    for x in range(i + 4):
                        qt.msleep(
                            self._dwell_time
                        )  # Wait for a length of time determined by how far we got
                        if self._get_control_f(
                        ) > initial_setpoint + 0.05 * self._scan_min:
                            break
                        print x

            # Add on extra values
            np.append(true_udrange, true_udrange_temp[0:i])
            np.append(values, values_temp[0:i])

        else:
            true_udrange = true_udrange_temp[0:i]
            values = values_temp[0:i]

        valid_i = np.where(values > self._min_value)
        print true_udrange
        print values
        true_udrange = true_udrange[valid_i]
        values = values[valid_i]
        print true_udrange
        print values

        if self.get_do_plot():
            p = plt.plot(name=self._plot_name)
            p.clear()
            plt.plot(true_udrange, values, 'O', name=self._plot_name)

        return (true_udrange, values)
Exemple #14
0
def main(nV,sparseness,noise):
    number_of_vectors=nV
    dimension_of_cla_vectors=1024
    dimension_of_mcr_vectors=1024
    r=[0,15]
    mcrV=list()

    claV=generateCLAVector(number_of_vectors,dimension_of_cla_vectors,sparseness)
    print( distance(claV[0],claV[1],[0,1]))

    convSet=generateConversionSet(dimension_of_cla_vectors,dimension_of_mcr_vectors,r)
    idxSet=generateConversionSet(dimension_of_cla_vectors,dimension_of_cla_vectors,r)
    print (distance(convertCLAtoMCR(claV[0],convSet,r,idxSet),convertCLAtoMCR(claV[1],convSet,r,idxSet),r))

    for i in range(0,number_of_vectors):
        mcrV.append(convertCLAtoMCR(claV[i],convSet,r,idxSet))
    
    CLA_dist=list()
    MCR_dist=list()
    random_MCR_dist=list()
    CLA_noisy_dist=list()
    MCR_noisy_dist=list()

    avg_CLA_dist=0
    avg_MCR_dist=0
    avg_random_MCR_dist=0
    avg_CLA_noisy_dist=0
    avg_MCR_noisy_dist=0
    
    for i in range(0,number_of_vectors):
        for j in range(i+1,number_of_vectors):
            CLA_dist.append(distance(claV[i],claV[j],[0,1]))
            MCR_dist.append(distance(mcrV[i],mcrV[j],r))
            random_MCR_dist.append(distance(convSet[i],convSet[j],r))

    for i in range(0,number_of_vectors):
        noisyV=addNoise(claV[i],noise)
        CLA_noisy_dist.append(distance(claV[i],noisyV,[0,1]))
        MCR_noisy_dist.append(distance(mcrV[i],convertCLAtoMCR(noisyV,convSet,r,idxSet),r))


    f = open('dist_datapoints','w')
    csvwriter=csv.writer(f)
    csvwriter.writerow(CLA_dist)
    csvwriter.writerow(MCR_dist)
    csvwriter.writerow(CLA_noisy_dist)
    csvwriter.writerow(MCR_noisy_dist)
    f.close()

    avg_CLA_dist=statistics.mean(CLA_dist)
    avg_MCR_dist=statistics.mean(MCR_dist)
    avg_MCR_noisy_dist=statistics.mean(MCR_noisy_dist)
    avg_CLA_noisy_dist=statistics.mean(CLA_noisy_dist)
    avg_random_MCR_dist=statistics.mean(random_MCR_dist)

    print ("Vectors used="+str(nV))
    print ("Average random MCR distance="+str(avg_random_MCR_dist) ) #average distance between any random MCR vectors
    print ("Average CLA Distance="+str(avg_CLA_dist) )#average distance of combination of all points(CLA vectors) in CLA space
    print ("Average MCR Distance="+str(avg_MCR_dist) )#average distance of combination of all points(MCR vectors) in MCR space
    print ("Average CLA Distance in "+str(noise)+" noisy CLA="+str(avg_CLA_noisy_dist) )#average distance between MCR projection of CLA vector and its noisy version
    print ("Average MCR Distance from "+str(noise)+" noisy CLA="+str(avg_MCR_noisy_dist) )#average distance between MCR projection of CLA vector and its noisy version

    sdv_CLA_dist=statistics.stdev(CLA_dist)
    sdv_MCR_dist=statistics.stdev(MCR_dist)
    sdv_MCR_noisy_dist=statistics.stdev(MCR_noisy_dist)
    sdv_CLA_noisy_dist=statistics.stdev(CLA_noisy_dist)
    sdv_random_MCR_dist=statistics.stdev(random_MCR_dist)

    print ("Standard Deviation random MCR Distance="+str(sdv_random_MCR_dist) )#std dev distance of random MCR vectors
    print ("Standard Deviation CLA Distance="+str(sdv_CLA_dist) )#std dev distance of combijation of all points(CLA vectors) in CLA space
    print ("Standard Deviation MCR Distance="+str(sdv_MCR_dist) )#std dev distance of combination of all points(MCR vectors) in MCR space
    print ("Standard Deviation CLA Distance in "+str(noise)+" noisy CLA="+str(sdv_CLA_noisy_dist) )#std dev distance between MCR projection of CLA vector and its noisy version
    print ("Standard Deviation MCR Distance from "+str(noise)+" noisy CLA="+str(sdv_MCR_noisy_dist)) #std dev distance between MCR projection of CLA vector and its noisy version


    return
Exemple #15
0
def dumpStatistics(arglist):
    print("mean({:.4E}) median({:.4E}) stdev({:.4E}) min({:.4E}) max({:.4E})".
          format(np.mean(arglist), np.median(arglist), np.stdev(arglist),
                 min(arglist), max(arglist)))
Exemple #16
0
def dumpFullStatistics(arglist):
    print(
        "mean({:.4E}) median({:.4E}) stdev({:.4E}) min({:.4E}) max({:.4E}) skew({:.4E}) fat_tail({:.4E}) kurtosis({:.4E})"
        .format(np.mean(arglist), np.median(arglist), np.stdev(arglist),
                min(arglist), max(arglist), skew(arglist), fatTail(arglist),
                kurtosis(arglist)))
Exemple #17
0
def error(g):

    nEFF=len(g)/ACtime(g)
    ERROR=numpy.stdev(g)/math.sqrt(nEFF)
    return ERROR
 def _get_means_stdevs(cls, x, y):
     x_y_counter = cls._convert_x_y_to_counter(x, y)
     st_dev = {x: stdev(y) for x, y in x_y_counter.items()}
     mean_ = {x: mean(y) for x, y in x_y_counter.items()}
     return cls._get_mean_stdev_from_counter(x_y_counter, st_dev, mean_)
def get_mean_stdev(x: list, y: list) -> tuple:
    x_y_counter = LinearDistributionFunction._convert_x_y_to_counter(x, y)
    st_dev = {x: stdev(y) for x, y in x_y_counter.items()}
    mean_ = {x: mean(y) for x, y in x_y_counter.items()}
    return mean_, st_dev
Exemple #20
0
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# Prep dataset
print('Prepare data')
data = pd.read_csv('data/btcusd.csv')
data['returns'] = data['close'] - data['close'].shift(1)
data['ma200'] = data.rolling(
    window=200).apply(lambda values: np.average(values))

# Discrete values
data['std30'] = data.rolling(
    window=30).apply(lambda values: np.round(np.stdev(values)))
data['std200'] = data.rolling(
    window=200).apply(lambda values: np.round(np.stdev(values)))
data['mom10'] = data['close'] - data['close'].shift(10) > 0
data['moving_average'] = data['close'] - data['ma200'] > 0
data = data.dropna()

# Setup pipeline
print('Create pipeline')
pipe_steps = [('scaler', StandardScaler()),
              ('descT', DecisionTreeClassifier())]
check_params = {
    'descT_criterion': ['gini', 'entropy'],
    'descT_max_depth': np.arrange(3, 15)
}
pipeline = Pipeline(pipe_steps)
Exemple #21
0
def CreateImageProduct(vid,product):

    ''' 
    Create an image product from a (web)camera.
    
    Inputs:
        vid (str): The video file from which to ceate the image product.
        product (int): The code for the image product to create. Currently only a code of 1, for timex, is supported. 
        
    Returns:
        product (array): The image product data
    
    '''    
    def addFrame(r,g,b,r_list,g_list,b_list):
        r = r+r_list[0]
        g = g+g_list[0]
        b = b+b_list[0]
        
        r_list = r_list[1:len(r_list)]
        g_list = g_list[1:len(g_list)]
        b_list = b_list[1:len(b_list)]
        
        return r,g,b,r_list,g_list,b_list
            
    
    # Read a frame to get the size #
    cap = cv2.VideoCapture(vid)
    cap.set(1,1) 
    test,im = cap.read()
    
    # Initialize empty arrays for each color channel #
    r = np.empty((len(im[:,0,:]),len(im[0,:,:])))
    g = np.empty((len(im[:,0,:]),len(im[0,:,:])))
    b = np.empty((len(im[:,0,:]),len(im[0,:,:])))
    
    # Loop through each frame to keep and add its color channel values to the growing array #
    r_list = list()
    g_list = list()
    b_list = list()
    numFrames = int(cap.get(7))
    for frame in range(0,numFrames):

        cap.set(1,frame) 
        test,image = cap.read()
        
        r_list.append(image[:,:,2])
        g_list.append(image[:,:,1])
        b_list.append(image[:,:,0])
        
    # Create the desired product #
    if product == 1: # Timex #
        
        while len(r_list)>0:
            r,g,b,r_list,g_list,b_list = addFrame(r,g,b,r_list,g_list,b_list)
        
        del r_list,g_list,b_list
        
        r = r/numFrames
        g = g/numFrames
        b = b/numFrames
            
        product = np.stack([r/255,g/255,b/255],axis=2)
        
        return product

    elif product == 2: # Variance #
        r = np.stdev(r)
def get_all_Qs_centre(elat,
                      elon,
                      slat,
                      slon,
                      evdp,
                      half_bin_size=3.0,
                      npoints=8):
    """
    OPTION 2:
    function draws a default 6 degree polygon around the point at the centre 
    of a ray and randomly samples points within the polygon.  
    Sized based on the spacing in the Q model.  Also returns a standard deviation
    Q value calculated in log space.  

    This option chosen over the ray as there is less variation in the
    values, and the code is much quicker to run (smaller nested loop).  
    """
    centre = get_centre_ray(elat, elon, slat, slon, evdp)
    lon, lat = centre[1], centre[0]
    #print(lon, lat)
    # create polygon around central point
    # arbitarily use 3 degrees... ?
    minlon = lon - half_bin_size
    maxlon = lon + half_bin_size
    minlat = lat - half_bin_size
    maxlat = lat + half_bin_size

    # define polygon
    #poly = Polygon([left_lower, left_upper, right_lower, right_upper])
    #points = random_points_within(poly, npoints)
    #print(points)

    freqs = extract_Q_freq.get_freq_list()
    Q_list = []
    stdevQ_list = []
    #print('looping freqs')
    for freq in freqs:
        print(freq)
        qdat = extract_Q_freq.get_Q_data(freq)

        # get q vals within region
        idx = where((qdat[:,0] >= minlon) & (qdat[:,0] <= maxlon) \
                   (qdat[:,1] >= minlat) & (qdat[:,1] <= maxlat))[0]

        Q_vals = qdat[:, 2][idx]

        # get mean & stdev
        Q_list.append(np.mean(Q_vals))
        stdevQ_list.append(np.stdev(Q_vals))
        '''
        Q_cumulative = 0
        Q_each_point_list = []
        #print('looping points')
        for i,point in enumerate(points):
            point = points[i]
            lonp, latp = point.coords.xy[0][0], point.coords.xy[1][0]
            Q_dict = extract_Q_freq.get_Q_value(float(freq), lonp, latp)
            #print(Q_dict)
            Q_cumulative += Q_dict['Q']
            Q_each_point_list.append(Q_dict['Q'])
        average_Q = Q_cumulative / npoints
        # calculate stdev in log10 space
        diff_cumulative = 0
        for Q in Q_each_point_list:
            diff_sqrd = (np.log10(Q) - np.log10(average_Q))**2
            diff_cumulative += diff_sqrd
        #standard error calculation for sample
        stdev_Q = (np.sqrt(diff_cumulative / (npoints-1))) / np.sqrt(npoints)

        Q_list.append(average_Q)
        stdevQ_list.append(stdev_Q) # stdev in log10 space
        '''

    return Q_list, stdevQ_list
def dgc_monthly(station,
                variable,
                flags,
                start,
                end,
                plots=False,
                diagnostics=False,
                idl=False):
    '''
    Original Distributional Gap Check

    :param obj station: station object
    :param str variable: variable to act on
    :param array flags: flags array
    :param datetime start: data start
    :param datetime end: data end
    :param bool plots: run plots
    :param bool diagnostics: run diagnostics
    :param bool idl: run IDL equivalent routines for median
    :returns: 
       flags - updated flag array
    '''

    if plots:
        import matplotlib.pyplot as plt

    st_var = getattr(station, variable)

    month_ranges = utils.month_starts_in_pairs(start, end)

    # get monthly averages
    month_average = np.empty(month_ranges.shape[0])
    month_average.fill(st_var.mdi)
    month_average_filtered = np.empty(month_ranges.shape[0])
    month_average_filtered.fill(st_var.mdi)

    all_filtered = utils.apply_filter_flags(st_var)
    for m, month in enumerate(month_ranges):

        data = st_var.data[month[0]:month[1]]

        filtered = all_filtered[month[0]:month[1]]

        month_average[m] = dgc_get_monthly_averages(data, OBS_LIMIT,
                                                    st_var.mdi, MEAN)
        month_average_filtered[m] = dgc_get_monthly_averages(
            filtered, OBS_LIMIT, st_var.mdi, MEAN)

    # get overall monthly climatologies - use filtered data

    month_average = month_average.reshape(-1, 12)
    month_average_filtered = month_average_filtered.reshape(-1, 12)

    standardised_months = np.empty(month_average.shape)
    standardised_months.fill(st_var.mdi)

    for m in range(12):

        valid_filtered = np.where(month_average_filtered[:, m] != st_var.mdi)

        if len(valid_filtered[0]) >= VALID_MONTHS:

            valid_data = month_average_filtered[valid_filtered, m][0]

            if MEAN:
                clim = np.mean(valid_data)
                spread = np.stdev(valid_data)

            else:
                if idl:
                    clim = utils.idl_median(
                        valid_data.compressed().reshape(-1))
                else:
                    clim = np.median(valid_data)
                spread = utils.IQR(valid_data)
                if spread <= SPREAD_LIMIT:
                    spread = SPREAD_LIMIT

            standardised_months[valid_filtered,
                                m] = (month_average[valid_filtered, m] -
                                      clim) / spread

    standardised_months = standardised_months.reshape(month_ranges.shape[0])

    good_months = np.where(standardised_months != st_var.mdi)

    # must be able to do this with masked arrays
    if plots:
        bins, bincenters = utils.create_bins(standardised_months[good_months],
                                             BIN_SIZE)
        dummy, plot_bincenters = utils.create_bins(
            standardised_months[good_months], BIN_SIZE / 10.)

        hist, binEdges = np.histogram(standardised_months[good_months],
                                      bins=bins)

        fit = utils.fit_gaussian(bincenters,
                                 hist,
                                 max(hist),
                                 mu=np.mean(standardised_months[good_months]),
                                 sig=np.std(standardised_months[good_months]))
        plot_gaussian = utils.gaussian(plot_bincenters, fit)

        dgc_set_up_plot(plot_gaussian,
                        standardised_months[good_months],
                        variable,
                        sub_par="Months")

    # remove all months with a large standardised offset

    if len(good_months[0]) >= MONTH_LIMIT:

        standardised_months = np.ma.masked_values(standardised_months,
                                                  st_var.mdi)
        large_offsets = np.where(standardised_months >= LARGE_LIMIT)

        if len(large_offsets[0]) > 0:

            for lo in large_offsets[0]:
                flags[month_ranges[lo, 0]:month_ranges[lo, 1]] = 1

            if plots:

                hist, binEdges = np.histogram(
                    standardised_months[large_offsets], bins=bins)
                plot_hist = np.array([0.01 if h == 0 else h for h in hist])
                plt.step(bincenters,
                         plot_hist,
                         'g-',
                         label='> %i' % LARGE_LIMIT,
                         where='mid',
                         zorder=5)

                plt.axvline(5, c='g')
                plt.axvline(-5, c='g')

        # walk distribution from centre and see if any assymetry
        sort_order = standardised_months[good_months].argsort()

        mid_point = len(good_months[0]) / 2

        good = True
        iter = 1
        while good:

            if standardised_months[good_months][sort_order][
                    mid_point -
                    iter] != standardised_months[good_months][sort_order][
                        mid_point + iter]:
                # using IDL notation
                tempvals = [
                    np.abs(
                        standardised_months[good_months][sort_order][mid_point
                                                                     - iter]),
                    np.abs(
                        standardised_months[good_months][sort_order][mid_point
                                                                     + iter])
                ]

                if min(tempvals) != 0:
                    if max(tempvals) / min(tempvals) >= 2. and min(
                            tempvals) >= 1.5:
                        # substantial asymmetry in distribution - at least 1.5 from centre and difference of 2.

                        if tempvals[0] == max(tempvals):
                            # LHS
                            bad = good_months[0][sort_order][:mid_point - iter]
                            if plots:
                                badplot = standardised_months[good_months][
                                    sort_order][:mid_point - iter]
                        elif tempvals[1] == max(tempvals):
                            #RHS
                            bad = good_months[0][sort_order][mid_point + iter:]
                            if plots:
                                badplot = standardised_months[good_months][
                                    sort_order][mid_point + iter:]

                        for b in bad:
                            flags[month_ranges[b, 0]:month_ranges[b, 1]] = 1

                        if plots:

                            hist, binEdges = np.histogram(badplot, bins=bins)
                            plot_hist = np.array(
                                [0.01 if h == 0 else h for h in hist])
                            plt.step(bincenters,
                                     plot_hist,
                                     'r-',
                                     label='Gap',
                                     where='mid',
                                     zorder=4)

                        good = False

            iter += 1
            if iter == mid_point: break

        if plots:
            plt.legend(loc='lower center',
                       ncol=4,
                       bbox_to_anchor=(0.5, -0.2),
                       frameon=False,
                       prop={'size': 13})
            plt.show()
            #plt.savefig(IMAGELOCATION+'/'+station.id+'_DistributionalGap.png')

    return flags  # dgc_monthly
Exemple #24
0
    times = defaultdict(dict)
    case_str = ','.join(['{}'] * 7).format(*case)
    string = "[{}] (Case {} of {}):".format(case_str, idx + 1, N)
    print_to_output(string, tee=True)

    for m1 in xrange(M1):
        instance = InstanceStructure(*case)
        instance.create_node_data()
        instance.create_scenario_data()
        string = 'INSTANCE ID: {}'.format(instance.ID)
        print_to_output(string)
        for m2 in xrange(M2):
            for mthd, app in (('RLT', 'PH'), ('BigM', 'PH')):
                obj, t = funcs[mthd](app)
                objs[mthd, app][m1, m2] = obj
                times[mthd, app][m1, m2] = t

                print '\tRUN TIME [{0:>4}/{1}]: {2} ({3})'.format(mthd, app, curr(obj), ptime(t))
            print

    with open(output, 'ab') as f:
        for (app, mthd), d in times.iteritems():
            thymes = d.values()
            for t in thymes:
                f.write('\tRUN TIME [{}/{}]: {}\n'.format(mthd, app, t))
            else:
                f.write('\n\tMEAN:  {} seconds\n'.format(mean(thymes)))
                f.write('\tSTDEV: {} seconds\n'.format(stdev(thymes)))
                f.write('\tMIN:   {} seconds\n'.format(min(thymes)))
                f.write('\tMAX:   {} seconds\n\n'.format(max(thymes)))
        else:
            coord2_src1.append(row['y1'])
            coord2_src2.append(row['y2'])

        diff = coord1_src2 - coord1_src1
        diff_list.append(diff)
        if horiz:
            dx = row['x2'] - row['x1']
        else:
            dx = row['y2'] - row['y1']
        dx_list.append(dx)
        angle = np.arctan2(diff, dx) * 180. / np.pi
        angle_list.append(angle)

    median_angle = np.median(angle_list)
    stdev_angle = np.stdev(angle_list)
    print(
        f'Median and stdev of trance angles relative to rows/cols: {median_angle} +/- {stdev_angle} degrees.'
    )

    if horiz:
        results = Table([
            coord2_src1, src1_list, coord2_src2, src2_list, diff_list, dx_list,
            angle_list
        ],
                        names=('src1_x', 'src1_y', 'src2_x', 'src2_y', 'diff',
                               'trace_length', 'angle'))
    else:
        results = Table([
            src1_list, coord2_src1, src2_list, coord2_src2, diff_list, dx_list,
            angle_list
Exemple #26
0
array = matrix[2,:]			# Get all columns of the third row and put them in an array
column_as_array = matrix[:,3]		# Gets column 4 and turns it into a 1d array
column_as_column = matrix[:, 3:4]	# Gets column four and preserves it as a column
new_matrix_with_2_cols = numpy.hstack(matrix[:, 3:4], matrix[:, 4:5])		# Stacks two columns horizontally
new_matrix_without_col4 = numpy.delete(matrix, 3, 1)		# Delete one column starting at column 4

# Array/matrix arithmetic
doublematrix = matrix * 2		# Every number in matrix gets multiplied by 2
doublematrix + matrix			# Every number in matrix is added to the corresponding number in doublematrix

# Summary statistics of matrices
import numpy
numpy.mean(matrix)			# Computes the mean of all values in the matrix
numpy.max(matrix)
numpy.min(matrix)
numpy.stdev(matrix)

numpy.mean(matrix, axis=0)		# Computes the average of each column, i.e. for each column, computes the average of all rows for that column
numpy.mean(matrix, axis=1)		# Computes the average of each row, i.e. for each row, computes the average of all columns in that row.

# Get the dimensions of a matrix (or a 1d-array) (rows, cols)
matrix.shape

# Sorting a matrix



### PLOTS ###

# Make plots
import matplotlib.pyplot
Exemple #27
0
def dgc_monthly(station, variable, flags, start, end, plots=False, diagnostics=False, idl = False):
    '''
    Original Distributional Gap Check

    :param obj station: station object
    :param str variable: variable to act on
    :param array flags: flags array
    :param datetime start: data start
    :param datetime end: data end
    :param bool plots: run plots
    :param bool diagnostics: run diagnostics
    :param bool idl: run IDL equivalent routines for median
    :returns: 
       flags - updated flag array
    '''

    if plots:
        import matplotlib.pyplot as plt
    
    st_var = getattr(station, variable)
    
    month_ranges = utils.month_starts_in_pairs(start, end)
    
    # get monthly averages
    month_average = np.empty(month_ranges.shape[0])
    month_average.fill(st_var.mdi)
    month_average_filtered = np.empty(month_ranges.shape[0])
    month_average_filtered.fill(st_var.mdi)
    
    all_filtered = utils.apply_filter_flags(st_var)
    for m, month in enumerate(month_ranges):
        
        data = st_var.data[month[0]:month[1]]
        
        filtered = all_filtered[month[0]:month[1]]
        
        month_average[m] = dgc_get_monthly_averages(data, OBS_LIMIT, st_var.mdi, MEAN)
        month_average_filtered[m] = dgc_get_monthly_averages(filtered, OBS_LIMIT, st_var.mdi, MEAN)
            
    # get overall monthly climatologies - use filtered data
    
    month_average = month_average.reshape(-1,12)
    month_average_filtered = month_average_filtered.reshape(-1,12)
    
    standardised_months = np.empty(month_average.shape)
    standardised_months.fill(st_var.mdi)
    
    for m in range(12):
        
        valid_filtered = np.where(month_average_filtered[:,m] != st_var.mdi)
        
        if len(valid_filtered[0]) >= VALID_MONTHS:
            
            valid_data = month_average_filtered[valid_filtered,m][0]
            
            if MEAN:
                clim = np.mean(valid_data)
                spread = np.stdev(valid_data)
                
            else:        
                if idl:
                    clim = utils.idl_median(valid_data.compressed().reshape(-1))
                else:
                    clim = np.median(valid_data)
                spread = utils.IQR(valid_data)
                if spread <= SPREAD_LIMIT:
                    spread = SPREAD_LIMIT
                    
            standardised_months[valid_filtered,m] = (month_average[valid_filtered,m] - clim) / spread 
                    
    standardised_months = standardised_months.reshape(month_ranges.shape[0]) 
    
    good_months = np.where(standardised_months != st_var.mdi)

    # must be able to do this with masked arrays
    if plots:
        bins, bincenters = utils.create_bins(standardised_months[good_months], BIN_SIZE)
        dummy, plot_bincenters = utils.create_bins(standardised_months[good_months], BIN_SIZE/10.)

        hist, binEdges = np.histogram(standardised_months[good_months], bins = bins)   

        fit = utils.fit_gaussian(bincenters, hist, max(hist), mu = np.mean(standardised_months[good_months]), sig = np.std(standardised_months[good_months]))
        plot_gaussian = utils.gaussian(plot_bincenters, fit)

        dgc_set_up_plot(plot_gaussian, standardised_months[good_months], variable, sub_par = "Months")
        
    # remove all months with a large standardised offset
        
    if len(good_months[0]) >= MONTH_LIMIT:
                
        standardised_months = np.ma.masked_values(standardised_months, st_var.mdi)
        large_offsets = np.where(standardised_months >= LARGE_LIMIT)

        if len(large_offsets[0]) > 0:
            
            for lo in large_offsets[0]:
                flags[month_ranges[lo,0]:month_ranges[lo,1]] = 1
                
            if plots:
                
                hist, binEdges = np.histogram(standardised_months[large_offsets], bins = bins)
                plot_hist = np.array([0.01 if h == 0 else h for h in hist])
                plt.step(bincenters, plot_hist, 'g-', label = '> %i' % LARGE_LIMIT, where = 'mid', zorder = 5)
                
                plt.axvline(5,c='g')
                plt.axvline(-5,c='g')



        # walk distribution from centre and see if any assymetry
        sort_order = standardised_months[good_months].argsort()

        mid_point = len(good_months[0]) / 2
        
        good = True
        iter = 1
        while good:
            
            if standardised_months[good_months][sort_order][mid_point - iter] != standardised_months[good_months][sort_order][mid_point + iter]:
                # using IDL notation
                tempvals = [np.abs(standardised_months[good_months][sort_order][mid_point - iter]),np.abs(standardised_months[good_months][sort_order][mid_point + iter])]
                
                if min(tempvals) != 0:
                    if max(tempvals)/min(tempvals) >= 2. and min(tempvals) >= 1.5:
                        # substantial asymmetry in distribution - at least 1.5 from centre and difference of 2.
                        
                        if tempvals[0] == max(tempvals):
                            # LHS
                            bad = good_months[0][sort_order][:mid_point - iter]
                            if plots: badplot = standardised_months[good_months][sort_order][:mid_point - iter]
                        elif tempvals[1] == max(tempvals):
                            #RHS
                            bad = good_months[0][sort_order][mid_point + iter:]
                            if plots: badplot = standardised_months[good_months][sort_order][mid_point + iter:]
                            
                        for b in bad:
                            flags[month_ranges[b,0]:month_ranges[b,1]] = 1
                
                        if plots:
                            
                            hist, binEdges = np.histogram(badplot, bins = bins)
                            plot_hist = np.array([0.01 if h == 0 else h for h in hist])
                            plt.step(bincenters, plot_hist, 'r-', label = 'Gap', where = 'mid', zorder = 4)
                
                        good = False        
                            
                
            iter += 1
            if iter == mid_point: break
                
                          
        if plots: 
            plt.legend(loc='lower center',ncol=4, bbox_to_anchor=(0.5,-0.2),frameon=False,prop={'size':13})
            plt.show()
            #plt.savefig(IMAGELOCATION+'/'+station.id+'_DistributionalGap.png')
                   
    return flags # dgc_monthly