def frecuencia_relativa_con_ancho(conjuntoDeDatos, ancho=0.1, titulo='Histograma de frecuencia relativa'): inicio = int(min(conjuntoDeDatos)) fin = int(max(conjuntoDeDatos)) n = int((fin - inicio) / (ancho)) res = stats.relfreq(conjuntoDeDatos, numbins=n) x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) graficar_frecuencia_relativa(x, res, titulo)
def findCounts(arr: np.ndarray) -> np.ndarray: """ Replaces the elements of the array by their relative frequencies (only 2D-arrays). :param arr: array of the dataset as numpy ndarray. :return: numpy ndarray. """ array_shape = arr.shape # check if array is empty, if it is, return 0 # this is the trivial case if len(arr) == 0: return arr # check if the array is more then two dimensional # if it is, then raise an error if len(array_shape) > 2: print("Dimension of ndarray must be exactly two.") return False else: # init the output list frequencies = np.empty(shape=array_shape, dtype=float) # calculate the statistics for each row for i in range(0, array_shape[1]): res = stats.relfreq(arr[:, i], len(arr[:, i])) print(res.frequency) np.append(frequencies, res.frequency) #returns numpy array with frequencies return frequencies
def cdf_vals_from_data(data, numbins=None, maxbins=None): # make sure data is a numpy array data = numpy.array(data) # by default, use numbins equal to number of distinct values # TODO: shouldn't this be one per possible x val? if numbins == None: numbins = numpy.unique(data).size if maxbins != None and numbins > maxbins: numbins = maxbins # bin the data and count fraction of points in each bin (for PDF) rel_bin_counts, min_bin_x, bin_size, _ =\ stats.relfreq(data, numbins, (data.min(), data.max())) # bin the data and count each bin (cumulatively) (for CDF) cum_bin_counts, min_bin_x, bin_size, _ =\ stats.cumfreq(data, numbins, (data.min(), data.max())) # normalize bin counts so rightmost count is 1 cum_bin_counts /= cum_bin_counts.max() # make array of x-vals (lower end of each bin) x_vals = numpy.linspace(min_bin_x, min_bin_x+bin_size*numbins, numbins) # CDF always starts at y=0 cum_bin_counts = numpy.insert(cum_bin_counts, 0, 0) # y = 0 cdf_x_vals = numpy.insert(x_vals, 0, x_vals[0]) # x = min x return cum_bin_counts, cdf_x_vals, rel_bin_counts, x_vals
def fit_stage3_(self, kappa, gamma): kwargs = {'confidence': gamma} self.set_params(**kwargs) x = self.encode_train_np y = self.y_train_np k = int(self.num_classes * kappa) sample_ratio = self._params['sample_ratio'] logger.debug('k for Stage 3: %d', k) self._s3_model = knn.KNeighborsClassifier( n_neighbors=k, n_jobs=-1, ) self._s3_model.fit(x, y) # compute the likelihood sample_size = int(np.floor(len(x) * sample_ratio)) logger.debug('[AD Stage 3]: Size of train set: %d', sample_size) x_sub = np.random.permutation(x)[:sample_size] neigh_indices = self._s3_model.kneighbors(x_sub, n_neighbors=k, return_distance=False) neigh_labels = np.array( [self.y_train_np[n_i] for n_i in neigh_indices], dtype=np.int16) bins = np.zeros((sample_size, self.num_classes), dtype=np.float32) # Is there any vectorization way to compute the histogram? for i in range(sample_size): bins[i] = stats.relfreq(neigh_labels[i], numbins=self.num_classes, defaultreallimits=(0, self.num_classes - 1))[0] self._s3_likelihood = np.mean(np.amax(bins, axis=1)) logger.debug('Train set likelihood: %f', self._s3_likelihood)
def meq_relfreq(df_dict, colname, l_limit, h_limit, step, numbins=10): range_df = create_range(df_dict, colname, l_limit, h_limit, step) rangeX = np.arange(l_limit, h_limit, step) X = np.zeros((len(rangeX),numbins)) for x in range(len(rangeX)): X[x] = rangeX[x] Y = [] Z = [] for x in rangeX: relfreq, startpoint, binsize, extrap = stats.relfreq(range_df.loc[x].values, numbins=numbins, \ defaultreallimits=(min(range_df.loc[x]),max(range_df.loc[x]))) Yline = [startpoint] Z.append(list(relfreq)) for _ in range(1, len(relfreq)): next_y = Yline[-1] + binsize Yline.append(next_y) Y.append(Yline) Y = np.array(Y) Z = np.array(Z) fig = plt.figure() ax = fig.gca(projection='3d') cset = ax.contourf(X, Y, Z, alpha=0.5) #ax.clabel(cset, fontsize=9, inline=1) ax.set_zlim3d(0, 1) plt.show() return (X, Y, Z)
def entropy_bin(self, points): fq = stats.relfreq(points, numbins=100, defaultreallimits=(np.amin(points), np.amax(points))) # print('entropy frequency', fq.frequency) return stats.entropy(fq.frequency, base=2)
def entropy(data, lower, upper): data_size = len(data) num_bins = int(np.sqrt(data_size)) s_dist = stats.relfreq(data, num_bins, (lower, upper)) H = 0 for i in range(num_bins): H += -s_dist.frequency[i] * np.log( max(s_dist.frequency[i], 1 / data_size)) return H
def get_rel_freq(lst_len, title): res = stats.relfreq(lst_len, numbins=10) x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) fig = plt.figure(figsize=(5, 4)) ax = fig.add_subplot(1, 1, 1) ax.bar(x, res.frequency, width=res.binsize) ax.set_title(title) ax.set_xlim([x.min(), x.max()]) plt.savefig(target_dir_path + '/' + title + '.jpg')
def get_hist(tuple_list): array = [] for item in tuple_list: array.append(item[1]) res = stats.relfreq(array, numbins=75) x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(1, 1, 1) ax.bar(x, res.frequency, width=res.binsize) plt.show()
def main(argv): with open(argv[0], 'r') as f: hyperparameters = json.load(f) dataset_meta_json_path = hyperparameters['dataset_meta_json_path'] writer = SummaryWriter(log_dir='../runs/' + hyperparameters['experiment_name'], flush_secs=10) standardization = RapidECalibrationStandardize(dataset_meta_json_path) dataset = RapidECalibrationDataset(dataset_meta_json_path, transform=standardization) model = RapidEClassifier( number_of_classes=hyperparameters['num_of_classes'], dropout_rate=hyperparameters['dropout_rate'], name=hyperparameters['experiment_name']) # SET GPU if hyperparameters['gpu']: if torch.cuda.is_available(): device = torch.device("cuda") model = torch.nn.DataParallel(model) else: device = torch.device("cpu") model = model logging.warning('CUDA is not availible on this machine') else: device = torch.device("cpu") model = model model.to(device) # OBJECTIVE LOSS labels = dataset.gettargets() set_l = set(labels) freq = stats.relfreq(np.array(labels), numbins=len(set_l)).frequency weights = torch.tensor((1 / freq) / np.sum(1 / freq), dtype=torch.float32) weights = weights.to(device) loss = nn.CrossEntropyLoss(weight=weights, reduction='sum') splitter = StratifiedSplitterForRapidECalibDataset( hyperparameters['num_of_folds'], dataset) # NESTED CROSS-VALIDATION nestedcrossvalidator = NestedCrossValidator(model=model, device=device, objectiveloss=loss, splitter=splitter, hyperparams=hyperparameters, tbwriter=writer) nestedcrossvalidator() writer.close()
def EstDesc(datos, vals): vals = stats.describe(datos) #valores.append(vals) freqs = stats.relfreq(datos, 100) #valores.append(freqs) print("Estadistica descriptiva: \n") print(vals) print("\nFrecuencias relativas: \n") print(freqs) freqrel = freqs[0] return freqrel
def probwin(data, window, nn, overlap, l_low, l_high): """ Creates an array displaying the histogram of a variable over time Input: data - a 1D array window - int corresponding to size of window nn - number of bins in histogram, determines histogram's resolution overlap - number of overlapping frames between each window """ tt = time.time() N = data.shape[0] pdf_array = np.zeros((int(N / window), nn)) vec_var = np.zeros((nn, )) time_var = np.array([i * window for i in range(int(N / window))]) max_vec = np.zeros(int(N / window)) skew_vec = np.zeros(int(N / window)) min_lim = l_low max_lim = l_high s = 1 / 2 * (max_lim - min_lim) / int(N) for i in range(0, int(N / window) - 1): pdf1, a, b, c = stats.relfreq(data[i * window:(i + 1) * window], nn, (min_lim - s, max_lim + s)) pdf2, low_lim, bin_s, c = stats.relfreq( data[(i + 1) * window - overlap:(i + 2) * window - overlap], nn, (min_lim - s, max_lim + s)) pdf_array[i, :] = pdf1 skew_vec[i] = np.nansum( (data[i * window:(i + 1) * window] - np.nanmean(data[i * window:(i + 1) * window]))**3) / window max_vec[i] = bin_s * np.argmax(pdf1) + low_lim vec_var = np.array([i * bin_s + low_lim for i in range(nn)]) print(time.time() - tt) return skew_vec, max_vec, vec_var, time_var, pdf_array
def init(F): A = [] with open(F, encoding='utf-8') as f: data = np.loadtxt(f, str, delimiter=",") for i in data: A.append(float(i[2])) A = np.array(A) print(max(A)) res = stats.relfreq(A, numbins=1000, defaultreallimits=(0, 2.5)) x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) y = np.cumsum(res.frequency) return x, y
def rel_pdf(self, df_dict, numbins=10): to_return = np.array([]) for k,v in df_dict.iteritems(): to_return = np.append(to_return, [k].append(stats.relfreq(v, numbins=numbins))) #plt.ion() #fig = plt.figure() #ax = fig.add_subplot(111, projection='3d') #X, Y, Z = axes3d.get_test_data(0.1) #ax.plot_wireframe(X, Y, Z, rstride=5, cstride=5) # # #for angle in range(0, 360): # # ax.view_init(30, angle) #plt.draw() return to_return
def draw_hist_rel_frec(data, title): res = stats.relfreq(data, numbins=150, defaultreallimits=None, weights=None) x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) fig = plt.figure(figsize=(30, 20)) ax = fig.add_subplot(1, 1, 1) ax.bar(x, res.frequency, width=res.binsize) ax.set_title(title) ax.set_xlim([x.min(), x.max() + 5]) plt.show()
def histMaker(self, data, nmax, limits, debug=0, caller=None): ''' return a histogram of input data with nmax bins between limits. Overflows are put in the uppermost bin ''' upperlimit = limits[1] trunc = numpy.minimum(data, numpy.ones(len(data)) * upperlimit) hist, lowerlimit, binsize, extrapoints = relfreq(trunc, nmax, limits) hist *= len(data) words = 'pmtcal.histMaker' if caller is not None: words = caller if debug > 0: print words, 'rebinned result #bins,limits,lowerlimit, binsize, extrapoints', nmax, limits, lowerlimit, binsize, extrapoints if extrapoints > 0: print words, 'ERROR rebinning, extrapoints', extrapoints, '. It should be zero!' return hist
def relativeFreq(data): ''' returns a relative frequency graph ''' a = np.array(data) # convert to np array recounted = Counter(data) # count how many colunns to have by making dict res = stats.relfreq(a, numbins=len(recounted)) res.frequency #freq array x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) fig = plt.pyplot.figure(figsize=(5, 4)) ax = fig.add_subplot(1, 1, 1) ax.bar(x, res.frequency, width=res.binsize) ax.set_title('Relative frequency histogram') ax.set_xlim([x.min(), x.max()]) plt.pyplot.show()
def __get_property_stats(self, property, bin_count=10): """ Used to print statistical properties of a requested property, using a provided number of bins :param property: Property of data to plot :param bin_count: Number of bins to use (set automatically for str properties) """ # --> Gather requested property property_lst = self.list_property(property) if type(property_lst[0]) is str: bin_count = len(set(property_lst)) else: bin_count = bin_count # --> Get binned statistical properties if type(property_lst[0]) is str: property_lst.sort() binned_item_frequency = [ len(list(group)) / len(property_lst) for key, group in groupby(property_lst) ] bin_labels = list(set(property_lst)) bin_labels.sort() else: binned_item_frequency = relfreq(property_lst, numbins=bin_count).frequency bin_size = (max(property_lst) - min(property_lst)) / bin_count bin_labels = [] tracker = min(property_lst) for _ in range(bin_count): bin_labels.append( str(int(tracker)) + " <-> " + str(int(tracker + bin_size))) tracker += bin_size return property_lst, bin_count, bin_labels, binned_item_frequency
def def_stage3_(self, adv, pred_adv, passed): """ Checking the class distribution of k nearest neighbours without predicting the inputs. Compute the likelihood using one-against-all approach. pred_adv : numpy.ndarray A dummy variable """ passed_indices = np.where(passed == 1)[0] if len(passed_indices) == 0: return passed x = adv[passed_indices] kappa = self._params['kappa'] k = self.num_classes * kappa gamma = self._params['confidence'] model = self._s3_model # KNeighborsClassifier for entire train set neigh_indices = model.kneighbors(x, n_neighbors=k, return_distance=False) neigh_labels = np.array( [self.y_train_np[n_i] for n_i in neigh_indices], dtype=np.int16) bins = np.zeros((len(x), self.num_classes), dtype=np.float32) for i in range(len(x)): bins[i] = stats.relfreq(neigh_labels[i], numbins=self.num_classes, defaultreallimits=(0, self.num_classes - 1))[0] likelihood = np.amax(bins, axis=1) logger.debug('Mean likelihood on adv: %f', likelihood.mean()) threshold = self._s3_likelihood * gamma blocked_indices = np.where(likelihood < threshold)[0] passed[blocked_indices] = 0 return passed
# Simulate 1,000 bservations from a Binomial # distribution with n = 5, prob = 0.6 random.seed(1234) n, p, N = 5, 0.5, 1000 x = np.random.binomial(n=n, p=p, size=N) # print the sample mean and variances print(x.mean()) print(x.var()) # Print the head of simulated binomial variable print(x[1:10]) # Plot a histogram of simulated binomial observations plt.hist(x) plt.ylabel('Count') plt.xlabel('$k$') plt.title('Binomial with $n =$ {}, $\pi = ${}, and {} observations'.format( n, p, N)) plt.show() # Plot a histogram of simulated binomial observations with relative frequency y = stats.relfreq(x, numbins=6) plt.bar(k, y.frequency) plt.ylabel('Relative frequency') plt.xlabel('$k$') plt.title('Binomial with $n =$ {}, $\pi = ${}, and {} observations'.format( n, p, N)) plt.show()
def plot_energy_efficiency( self, UAV_trajectory_ris, UAV_trajectory_ris_no_shift, UAV_trajectory_no_ris, GT_schedule_ris, GT_schedule_ris_no_shift, GT_schedule_no_ris, UAV_flight_time_ris, UAV_flight_time_ris_no_shift, UAV_flight_time_no_ris, eps, slot_ris, slot_ris_no_shift, slot_no_ris): [Th_ris, rate_ris] = self.env.throughput(UAV_trajectory_ris, UAV_flight_time_ris, GT_schedule_ris, eps, 1, 1, slot_ris) [Th_ris_no_shift, rate_ris_no_shift] = self.env.throughput( UAV_trajectory_ris_no_shift, UAV_flight_time_ris_no_shift, GT_schedule_ris_no_shift, eps, 1, 0, slot_ris_no_shift) [Th_no_ris, rate_no_ris ] = self.env.throughput(UAV_trajectory_no_ris, UAV_flight_time_no_ris, GT_schedule_no_ris, eps, 0, 0, slot_no_ris) PEnergy_ris = self.env.flight_energy(UAV_trajectory_ris, UAV_flight_time_ris, eps, slot_ris) PEnergy_ris_shift = self.env.flight_energy( UAV_trajectory_ris_no_shift, UAV_flight_time_ris_no_shift, eps, slot_ris_no_shift) PEnergy_no_ris = self.env.flight_energy(UAV_trajectory_no_ris, UAV_flight_time_no_ris, eps, slot_no_ris) plot_ee = np.zeros((3, eps), dtype=np.float) for i in range(eps): plot_ee[0, i] = 1000 * np.sum(Th_ris[i, :]) / np.sum( PEnergy_ris[i, :]) plot_ee[1, i] = 1000 * np.sum(Th_ris_no_shift[i, :]) / np.sum( PEnergy_ris_shift[i, :]) plot_ee[2, i] = 1000 * np.sum(Th_no_ris[i, :]) / np.sum( PEnergy_no_ris[i, :]) myfont = matplotlib.font_manager.FontProperties( fname= r"/usr/local/lib/python2.7/site-packages/matplotlib/mpl-data/fonts/ttf/SimHei.ttf" ) res_1 = stats.relfreq(plot_ee[0, :], numbins=25) x_1 = res_1.lowerlimit + np.linspace( 0, res_1.binsize * res_1.frequency.size, res_1.frequency.size) y_1 = np.cumsum(res_1.frequency) res_2 = stats.relfreq(plot_ee[1, :], numbins=25) x_2 = res_2.lowerlimit + np.linspace( 0, res_2.binsize * res_2.frequency.size, res_2.frequency.size) y_2 = np.cumsum(res_2.frequency) res_3 = stats.relfreq(plot_ee[2, :], numbins=25) x_3 = res_3.lowerlimit + np.linspace( 0, res_3.binsize * res_3.frequency.size, res_3.frequency.size) y_3 = np.cumsum(res_3.frequency) plt.plot(x_1, y_1, c='g', linestyle='-', marker='<', label=u"RIS-Assisted UAV") plt.plot(x_2, y_2, c='b', linestyle='-', marker='>', label=u"UAV-R/P") plt.plot(x_3, y_3, c='r', linestyle='-', marker='o', label=u"UAV/R") #plt.plot(range(eps), plot_ee[0,:].T, c='r',linestyle='-', marker='<', label=u"RIS-Assisted UAV") #plt.plot(range(eps), plot_ee[1,:].T, c='b', linestyle='-', marker='>',label=u"RIS-Assisted UAV without passive shift") #plt.plot(range(eps), plot_ee[2,:].T, c='g',linestyle='-', marker='o', label=u"UAV system without RIS") font = { 'family': 'Times New Roman', 'weight': 'normal', 'size': 12, } plt.xlabel(u'Energy-Efficiency(bits/J)', font) plt.ylabel(u'CDF', font) plt.legend(prop=font) plt.grid() plt.savefig('EE.eps') plt.show() ave_ris = np.sum(plot_ee[0, :]) / eps ave_ris_no_shift = np.sum(plot_ee[1, :]) / eps ave_no_ris = np.sum(plot_ee[2, :]) / eps print("Energy efficieny: : RIS:%f;RIS_NO_SHIFT:%f;NO_RIS:%f" % (ave_ris, ave_ris_no_shift, ave_no_ris)) return
def entropy(list_ecg, numbins=100, base=None): """ numbins : number of bins to use for the histogram base : base of the log for entropy calculation """ return stats.entropy( stats.relfreq(list_ecg, numbins).frequency, None, base)
hist_perc, bin_edges_perc = np.histogram(iris_targets, bins=3, density=True) print('np.histogram', hist) print('np.histogram', hist_perc, hist_perc * np.diff(bin_edges_perc)) # https://numpy.org/doc/stable/reference/generated/numpy.unique.html # similar to bincount, but you may also use a string array # good to parse a string array into an int array # may also return frequency targets, index_1st_occurrence, target_array, target_frequency = np.unique( data['target'], return_index=True, return_inverse=True, return_counts=True) print('np.unique', targets, index_1st_occurrence, target_array, target_frequency) # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.relfreq.html # similar to bincount, but return the frequency in percentage instead frequency, lower_limit, binsize, extra_points = stats.relfreq(iris_targets, numbins=3) print('sp.stats.relfreq', frequency, lower_limit, binsize, extra_points) print('\nMean, Median, Mode, Quantile, Percentile') mean = np.mean(data['sepal length']) print('np.mean', mean) trimmed_mean = stats.trim_mean(data['sepal length'], .25) print('sp.stats.trim_mean (25%)', trimmed_mean) median = np.median(data['sepal length']) print('np.median', median) quantile = np.quantile(data['sepal length'], .25) print('np.quantile 1st (25%)', quantile)
def plot_propulsion_energy(self, UAV_trajectory_ris, UAV_trajectory_ris_no_shift, UAV_trajectory_no_ris, UAV_flight_time_ris, UAV_flight_time_ris_no_shift, UAV_flight_time_no_ris, eps, slot_ris, slot_ris_no_shift, slot_no_ris): PEnergy_ris = self.env.flight_energy(UAV_trajectory_ris, UAV_flight_time_ris, eps, slot_ris) PEnergy_ris_no_shift = self.env.flight_energy( UAV_trajectory_ris_no_shift, UAV_flight_time_ris_no_shift, eps, slot_ris_no_shift) PEnergy_no_ris = self.env.flight_energy(UAV_trajectory_no_ris, UAV_flight_time_no_ris, eps, slot_no_ris) plot_energy = np.zeros((3, eps), dtype=np.float) for i in range(eps): plot_energy[0, i] = plot_energy[0, i] + np.sum(PEnergy_ris[i, :]) / 1000 plot_energy[1, i] = plot_energy[1, i] + np.sum( PEnergy_ris_no_shift[i, :]) / 1000 plot_energy[ 2, i] = plot_energy[2, i] + np.sum(PEnergy_no_ris[i, :]) / 1000 myfont = matplotlib.font_manager.FontProperties( fname= r"/usr/local/lib/python2.7/site-packages/matplotlib/mpl-data/fonts/ttf/SimHei.ttf" ) res_1 = stats.relfreq(plot_energy[0, :], numbins=25) x_1 = res_1.lowerlimit + np.linspace( 0, res_1.binsize * res_1.frequency.size, res_1.frequency.size) y_1 = np.cumsum(res_1.frequency) res_2 = stats.relfreq(plot_energy[1, :], numbins=25) x_2 = res_2.lowerlimit + np.linspace( 0, res_2.binsize * res_2.frequency.size, res_2.frequency.size) y_2 = np.cumsum(res_2.frequency) res_3 = stats.relfreq(plot_energy[2, :], numbins=25) x_3 = res_3.lowerlimit + np.linspace( 0, res_3.binsize * res_3.frequency.size, res_3.frequency.size) y_3 = np.cumsum(res_3.frequency) plt.plot(x_1, y_1, c='g', linestyle='-', marker='<', label=u"RIS-Assisted UAV") plt.plot(x_2, y_2, c='b', linestyle='-', marker='>', label=u"UAV-R/P") plt.plot(x_3, y_3, c='r', linestyle='-', marker='o', label=u"UAV/R") #plt.plot(np.arange(eps), plot_energy[0,:].T, c='r', linestyle='-', marker='<',label=u"RIS-Assisted UAV") #plt.plot(np.arange(eps), plot_energy[1,:].T, c='b', linestyle='-', marker='>',label=u"RIS-Assisted UAV without passive shift") #plt.plot(np.arange(eps), plot_energy[2,:].T, c='g', linestyle='-', marker='o',label=u"UAV system without RIS") font = { 'family': 'Times New Roman', 'weight': 'normal', 'size': 12, } plt.xlabel(u'Propulsion Energy(KJ)', font) plt.ylabel(u'CDF', font) plt.legend(prop=font) plt.grid() plt.savefig('PE.eps') plt.show() sum_ris = np.sum(plot_energy[0, :]) / eps sum_ris_no_shift = np.sum(plot_energy[1, :]) / eps sum_no_ris = np.sum(plot_energy[2, :]) / eps print("Propulsion Energy: RIS:%f;RIS_NO_SHIFT:%f;NO_RIS:%f" % (sum_ris, sum_ris_no_shift, sum_no_ris)) return
pdf['day_of_week'] = pdf['Date'].apply( lambda x: x.weekday()) # get the weekday index, between 0 and 6 pdf['day_of_week'] = pdf['day_of_week'].apply(lambda x: calendar.day_name[x]) fig, ax = plt.subplots() pdf.groupby(['Date']).count()['Item'].plot(ax=ax) ax.set_title('Sales by day') # Part B Describe the customer # How many times do they sell every item? item_sold = pdf.groupby(['Item']).count() item_sold = item_sold.drop(columns=['Time', 'DateTime', 'Date', 'day_of_week']) print item_sold # What is the relative frequency of sales? (graph) res = stats.relfreq(pdf.Transaction, numbins=9684) res.frequency x = res.lowerlimit + np.linspace(0, res.binsize * res.frequency.size, res.frequency.size) fig = plt.figure(figsize=(9, 7)) ax = fig.add_subplot(1, 1, 1) ax.bar(x, res.frequency, width=res.binsize) ax.set_title('Relative frequency histogram') plt.show() # How often do people buy tea with coffee? How about Coffee and croissant? Coffee and something from the bakery? cof = pdf.loc[pdf['Item'] == 'Coffee'] tea = pdf.loc[pdf['Item'] == 'Tea'] cro = pdf.loc[pdf['Item'] == 'Croissant']
from scipy import stats import matplotlib.pyplot as plot # Loads information from a Json file input_file = open('Ch1Data.json') data = json.load(input_file) assign_name = 'Ch1.4' # Parse the Ch1.4 Data into a local array values = data[assign_name] # This simply creates a histogram variable that we assign to # scipy's relfreq which is a relative frequency histogram # We then pass the values from above, and a numbers of bins # we want to associate with the data set. histogram = stats.relfreq(values, numbins=10) numpy.sum(histogram.frequency) x = histogram.lowerlimit + numpy.linspace( 0, histogram.binsize * histogram.frequency.size, histogram.frequency.size) # Configure the histogram that will be shown. fig = plot.figure(figsize=(5, 4)) ax = fig.add_subplot(1, 1, 1) # Creates the bars used in the Histogram. ax.bar(x, histogram.frequency, width=histogram.binsize) ax.set_title('Relative frequency histogram for {}'.format(assign_name)) ax.set_xlim([x.min(), x.max()]) mean = "mean : {}".format(numpy.mean(values)) variance = "variance : {}".format(numpy.var(values, ddof=1))
# compute histogram n, low_range, binsize, extrapoints = st.histogram(x) upper_range = low_range+binsize*len(n) bins = np.linspace(low_range, upper_range, len(n)+1) #bins = 0.5*(bins[:-1] + bins[1:]) # plot the histogram plt.clf() plt.bar(bins[:-1], n, width=0.4, color='red') plt.xlabel('X', fontsize=20) plt.ylabel('number of data points in the bin', fontsize=15) plt.savefig('/home/tomer/my_books/python_in_hydrology/images/hist.png') # compute and plot the relfreq relfreqs, lowlim, binsize, extrapoints = st.relfreq(x) plt.clf() plt.bar(bins[:-1], relfreqs, width=0.4, color='magenta') plt.xlabel('X', fontsize=20) plt.ylabel('Relative frequencies', fontsize=15) plt.savefig('/home/tomer/my_books/python_in_hydrology/images/relfreq.png') # compute and plot pdf plt.clf() n, bins, patches = plt.hist(x, 10, normed=1, facecolor='yellow', alpha=0.5) plt.xlabel('X', fontsize=15) plt.ylabel('PDF', fontsize=15) plt.savefig('/home/tomer/my_books/python_in_hydrology/images/pdf.png') # compute and plot cdf cumfreqs, lowlim, binsize, extrapoints = st.cumfreq(x)
bins=bins[i], color='gray', hist_kws={'edgecolor': 'black'}, kde_kws={ 'linewidth': 1, 'color': 'blue' }, ax=ax) ax.set_xlim((-1.1 * span, 1.1 * span)) ax.set_ylim((0, 0.05)) ax.set_ylabel('Density') ax.set_xlabel(f'Change in PIP ({bins[i]} bins)') x = np.linspace(-span, span, 100) ax.plot(x, stats.norm.pdf(x, mu, sigma), color='red') qqplot(np.array(values), line='s', ax=axs[1, i], marker='.', color='gray') h = stats.relfreq(values, numbins=bins[i]) xp = h.lowerlimit + np.linspace(0, h.binsize * h.frequency.size, h.frequency.size) ax = axs[2, i] y = [] x = [] j = 0 for j in range(len(xp)): v = h.frequency[j] if v >= threshold: y.append(v) x.append(xp[j]) j += i ax.set_xlim((-1.1 * span, 1.1 * span)) ax.set_ylim((0, 0.7)) ax.set_ylabel('Relative frequency')
"cases", "infection", 'maskenpflicht', 'stayhomestaysafe', 'stayhome', 'distancing', 'covidiots' ] abstimmung = [ "kampfjets", 'armee', 'begrenzungsinitiative', 'svp', 'srf', 'initiative', 'abstimmung', 'streik' ] cleaned_tweets = keywordTweets(abstimmung, tweets) #print(cleaned_tweets) for i, data in enumerate(scores): overall = sum(data) d = stats.relfreq(data, numbins=20) #print(d) #plt.bar(np.arange(len(data)), d) #df = pd.DataFrame(data) #df.plot.hist(bins=20) #plt.plot(d.frequency) #plt.savefig(f'{i}.png') with open('TweetAnalysis/cleaned_tweets_en.json', 'w') as f: f.write(json.dumps(cleaned_tweets)) if False: tweets = tweet_query() with open(f'TweetAnalysis/tweets.json', 'w', encoding='utf-8') as f: json.dump(tweets, f, ensure_ascii=False)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument( "--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;", ) parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help= "Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help= "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values.", ) parser.add_argument( "--fisher", action="store_true", default=False, help="if true then Fisher definition is used", ) parser.add_argument( "--bias", action="store_true", default=False, help= "if false,then the calculations are corrected for statistical bias", ) parser.add_argument( "--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored", ) parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored", ) parser.add_argument( "--inclusive", action="store_true", default=False, help="if false,limit will be ignored", ) parser.add_argument( "--printextras", action="store_true", default=False, help= "If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help= "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument( "--correction", action="store_true", default=False, help="continuity correction ", ) parser.add_argument( "--axis", type=int, default=0, help= "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help= "the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument( "--score", type=int, default=0, help="Score that is compared to the elements in a.", ) parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help= "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument( "--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds", ) parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help= "lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help= "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument( "--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e", ) parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols is not None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols is not None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols is not None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe( map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis( map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias, ) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode, ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf == 0 and mf == 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf == 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf == 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf == 0 and mf == 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf == 0 and mf == 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf == 0 and mf == 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf == 0 and mf == 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation, ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation, ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf == 0 and mf == 0: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf == 0 and mf == 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf == 0 and mf == 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1( map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail, ) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf == 0 and mf == 0: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf == 0 and mf == 0: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf == 0 and mf == 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda == 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two)) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity, ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one), map(float, sample_two), equal_var=args.equal_var) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort, ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction, ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_, ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two), ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
b = np.linalg.lstsq(X, Y, rcond=None)[0] resid = Y - X.dot(b) print("Коэффициенты линейной регрессии:") print(b, '\n') print("Ожидаемое значение вектора ошибок:") print(np.mean(resid), '\n') print("Изменение вектора ошибок:") print(np.var(resid, ddof=1), '\n\n\n') print("Хи-квадрат критерий Пирсона:") print("Хи-квадрат критерий Пирсона для k от 3 до 100") for k in range(3, 100): print('k = ', k) res = ss.relfreq(residuals, numbins=k, defaultreallimits=(np.amin(residuals), np.amax(residuals))) observed = res.frequency * len(residuals) mu = np.mean(residuals) sigma = np.std(residuals, ddof=1) inter = np.linspace(np.amin(residuals), np.amax(residuals), k + 1) expected = np.array([]) for i in range(k): n = (ss.norm.cdf(inter[i + 1], mu, sigma) - ss.norm.cdf(inter[i], mu, sigma)) * len(residuals) expected = np.append(expected, n) print('Встроенная функция хи-квадрат:') print(ss.chisquare(observed, expected, ddof=k - 2), '\n') print("Наш хи-квадрат:") stat, p_val = my_chi_square(observed, expected, ddof=k - 2)
def syncFLIR_diagnostics(): try: logfilepath, logfile = read_csv_logfile() except: return pdf, fileName = initialize_pdf(logfilepath) # Split logfile by Serial number grouped = logfile.groupby(logfile.SerialNumber) # positioning for output text and figures in pdf file htext = 610 wtext = 50 whist = 30 himage = 270 print("Writing diagnostics report ...") # suppress output with contextlib.redirect_stderr(None): # analyze all cameras from csv for serial in grouped.grouper.levels[0]: # Diagnose recording group = grouped.get_group(serial) group.sort_values(by=['FrameID'], inplace=True) lastFrame = group['FrameID'].max() timespan = (group['Timestamp'].max() - group['Timestamp'].min()) / 1e9 group['IntFramesInt'] = group['Timestamp'].diff() / 1e9 group['FrameSkip'] = group['FrameID'].diff() - 1 avgfps = lastFrame / timespan meanfps = 1 / group.IntFramesInt.mean() critFPS = group.IntFramesInt[group.IntFramesInt > .04].count() skipFrames = group.FrameSkip.sum() missingFrames = logfile['FrameID'].max() - lastFrame # save output serialnum = 'Camera: #' + str(serial) numframes = 'Total frames: ' + str(lastFrame) duration = 'Recording time: ' + time.strftime( "%M:%S", time.gmtime(timespan)) avgfps = 'Frames/Time: ' + str("{:.2f}".format(avgfps)) meanfps = 'Mean FPS: ' + str("{:.2f}".format(meanfps)) critical = 'Critical frames: ' + str(critFPS) skipped = 'Skipped frames: ' + str(int(skipFrames)) missing = 'Missing frames: ' + str(int(missingFrames)) textLinesReport = [ serialnum, numframes, duration, avgfps, meanfps, critical, skipped, missing ] # Plot FPS time series plt.rcParams['font.size'] = '12' timeseries = 'timeseries-' + str(serial) + '.png' fig, ax = plt.subplots(figsize=(10, 2)) ax.plot(group.FrameID, group.IntFramesInt, marker='.', alpha=0.3, color='black', linestyle='solid') ax.axhline(y=.04, color='r', linestyle='-', lw=2) plt.text(0, 0.045, 'FPS = 25', color='r', rotation=0) ax.axhline(y=.02, color='y', linestyle='-', lw=2) plt.text(0, 0.025, 'FPS = 50', color='y', rotation=0) ax.axhline(y=.005, color='g', linestyle='-', lw=2) plt.text(0, .01, 'FPS = 200', color='g', rotation=0) plt.ylabel('Inter Frame Interval') plt.xlabel('Frame ID') plt.title(serial) plt.savefig(timeseries) # Plot FPS Histogram plt.rcParams['font.size'] = '34' histogram = 'histogram-' + str(serial) + '.png' res = stats.relfreq(group.IntFramesInt.dropna(), numbins=30) x = res.lowerlimit + np.linspace( 0, res.binsize * res.frequency.size, res.frequency.size) fig, ax = plt.subplots(figsize=(18, 12)) ax.bar(x, res.frequency, width=res.binsize) ax.axvline(x=.04, color='r', linestyle='-', lw=1) plt.text(.05, .4, 'FPS = 25', color='r', rotation=0) ax.axvline(x=.02, color='y', linestyle='-', lw=1) plt.text(.05, .45, 'FPS = 50', color='y', rotation=0) ax.axvline(x=.005, color='g', linestyle='-', lw=1) plt.text(.05, .5, 'FPS = 200', color='g', rotation=0) plt.xlabel('Inter Frame Interval') plt.xlim(0, 0.075) plt.ylabel('Relative Frequency') plt.title(serial) plt.savefig(histogram) # write diagnostics to pdf text = pdf.beginText(wtext, htext) text.setFont("Times-Roman", 11) for line in textLinesReport: text.textLine(line) pdf.drawText(text) # write timeseries figures to pdf pdf.drawInlineImage(timeseries, 0, himage, width=600, height=120) os.remove(timeseries) # wirte histograms to pdf pdf.drawInlineImage(histogram, whist, 390, width=180, height=120) os.remove(histogram) # move next text to the right wtext = wtext + 180 # move histogram to the right whist = whist + 180 # move image down himage = himage - 125 pdf.save() return fileName
%matplotlib inline import numpy as np import matplotlib.pyplot as plt from scipy import stats as st testscores = np.random.normal(9.5, 2.5, 500) print('describe = ', st.describe(testscores)) print('mean = ', np.mean(testscores)) print('mode = ', st.mode(testscores)) print('tmean = ', st.tmean(testscores,[5,15])) print('variation = ', st.variation(testscores)) print('skewness = ', st.skew(testscores)) print('kurtosis = ', st.kurtosis(testscores)) print('zscore = ', st.zscore(testscores)[:20]) relfr = st.relfreq(testscores, 20) print('relfreq = ', relfr) x1 = relfr.lowerlimit + np.linspace(0, relfr.binsize*relfr.frequency.size, relfr.frequency.size) cumfr = st.cumfreq(testscores, 20) print('cumfreq = ', cumfr) x2 = cumfr.lowerlimit + np.linspace(0, cumfr.binsize*cumfr.cumcount.size, cumfr.cumcount.size) plt.subplot(2,2,1) plt.hist(testscores, 20) plt.title('Histogram') plt.xticks(()) plt.subplot(2,2,2) plt.bar(x1, relfr.frequency, width = relfr.binsize) plt.title('Relative histogram')
import numpy as np import matplotlib.pyplot as plt import scipy.stats as stats plt.figure(num=1, figsize=[10, 7]) ns = 1000. dist = np.random.randn(ns) dmin = np.amin([dist]) dmax = np.amax([dist]) nbins = 10 relfreqs, lowlim, binsize, extrapoints = stats.relfreq(dist, nbins) print relfreqs.shape print lowlim print binsize print np.linspace(lowlim, binsize * (nbins + 1) + lowlim, nbins).shape plt.plot(np.linspace(lowlim, binsize * (nbins + 1) + lowlim, nbins), relfreqs) # histogram 1 nbins = 9 n1, bins, patches = plt.hist(dist, bins=nbins, histtype='step', hold=True, range=(dmin, dmax), color='white')