def accuracy_pdf(): df = pd.read_csv( "/Users/maksim/dev_projects/merf/figures/accuracy_results/precision_recall.csv" ) prec = df['cpu_precision'] - df['gpu_precision'] rec = df['cpu_recall'] - df['gpu_recall'] print(stats.describe(prec)) print(stats.describe(rec)) plt.hist(prec, bins=20, label="$CPU_p - GPU_p$", alpha=.5) plt.hist(rec, bins=100, label="$CPU_r - GPU_r$", alpha=.5) plt.legend() tikzplotlib.save("accuracy.tex")
def get_freq_distn_stats(self): pred_list = self.pred_info[2] # obtains data from predicted contact dictionary pred_list.sort(key=lambda x: float(x[3]), reverse=True) # get native information on contact nat_list, nat_dict = get_epitopes_above_ths(self.nat_info[2]) tp_reslist, tp_resind = [], [] # get true predictions and their indices for ind, res in enumerate(pred_list): if float(res[3]) <= 0.0: continue res_name = '%s_%s_%s'%(res[0], res[1], res[2]) if res_name in nat_dict: # if pred res with >0 score is in native epitope tp_reslist.append(res[3]) tp_resind.append(ind) #print (len(pred_list)) #print (len(nat_list)) #print (len(tp_reslist)) #print (tp_resind) #yfreq = [i[3] for i in pred_list[:len(y)] if i[3]>0.0] yfreq = [float(i[3]) for i in pred_list if float(i[3])>0.0] xfreq = [i for i in range(1,len(yfreq)+1)] if len(yfreq) == 0: return xfreq, yfreq, tp_reslist, tp_resind, 0, 0, 0 curr_stats = stats.describe(yfreq) kur, skew, var = curr_stats.kurtosis, curr_stats.skewness, curr_stats.variance return xfreq, yfreq, tp_reslist, tp_resind, kur, skew, var
def generate_csv(): cases_list = unpickle_data() csv_name = 'complete_data.csv' FULL_CSV = pd.DataFrame(columns=CSV_COLS) for c in cases_list: print(f" > Case {c._case_name}") for r in c: print(f"\t\t + RECORD {r.name}", end="") values = list() for k, v in r.N_LINEAR.items(): s = stats.describe(v) values.extend([ s[2], # Mean s[3], # Variance s[4], # Skewness spectral_entropy(v, sf=r.fs, method='fft') # Spectral Entropy ]) row_data = [ c._case_name, # Case r.name, # Record c.pathology, # Condition COND_ID[c.pathology], # Condition ID len(r.rr), # RR Length ] + values FULL_CSV = FULL_CSV.append(pd.Series(data=row_data, index=CSV_COLS), ignore_index=True) print("[v]") FULL_CSV.to_csv(csv_name, index=False)
def edit_row(row): rr = row['rr'] s = stats.describe(rr) new_row = row[['record', 'condition']] new_row['mean'] = s[2] new_row['variance'] = s[3] new_row['skewness'] = s[4] new_row['kurtosis'] = s[5] return new_row
def compute(pred_scores): describe = stats.describe(pred_scores) metrics = { "min": describe.minmax[0], "max": describe.minmax[1], "mean": describe.mean, "variance": describe.variance, "skewness": describe.skewness, "kurtosis": describe.kurtosis } return metrics
def process_row(row: pd.Series) -> pd.Series: data = dict(row[[m["tag"] for m in NL_METHODS]]) for tag, vec in data.items(): s = stats.describe(vec) values = [ s[2], s[3], s[4], spectral_entropy(vec, sf=row['fs'], method='fft') ] for n, v in zip(punctual_names, values): row[tag + n] = v return row
def save_test(): TEST_DIRS = list(Path('.').glob('Test_*ws/')) for td in TEST_DIRS: t_cases = test_unpickle(td) pdir = "Test/" csv_name = pdir + td.stem + '.csv' pkl_name = pdir + td.stem + '.pkl' csv_data = pd.DataFrame(columns=CSV_COLS) pkl_data = pd.DataFrame(columns=CSV_COLS[:5]) for c in t_cases: for r in c: # Process for CSV values = list() row_data = [ c._case_name, r.name, c.pathology, COND_ID[c.pathology], len(r.rr_int), ] for k, v in r.N_LINEAR.items(): s = stats.describe(v) row_data.extend([ s[2], s[3], s[4], spectral_entropy(v, sf=r.fs, method='fft') ]) csv_data = csv_data.append(pd.Series( data=row_data, index=CSV_COLS, ), ignore_index=True) # Process for pickle pkl_row = { 'case': c._case_name, 'record': r.name, 'condition': c.pathology, 'cond_id': COND_ID[c.pathology], 'length': len(r.rr_int) } pkl_row.update(r.N_LINEAR) pkl_data = pkl_data.append(pd.DataFrame(pkl_row)) # DATA IS SAVED IN BOTH FORMATS csv_data.to_csv(csv_name, index=False) with open(pkl_name, 'wb') as pf: pickle.dump(pkl_data, pf)
def linearWindowing(rr_signal: np.ndarray): """ Evaluates rr with linear functions based on a rolling window. rr_signal :: RR vector of time in seconds """ means, var, skew, kurt = list(), list(), list(), list() for idx in range(0, len(rr_signal) - RR_WLEN, RR_STEP): window_slice = slice(idx, idx + RR_WLEN) rr_window = rr_signal[window_slice] ds = stats.describe(rr_window) means.append(ds[2]) var.append(ds[3]) skew.append(ds[4]) kurt.append(ds[5]) return means, var, skew, kurt
def compute_pairwise_embedding_distance_features(a_mat, b_mat) -> List: """ Computes pairwise embedding distance features. :param a_mat: :param b_mat: :return: Note that the order of values is the same as in `create_feature_names` (mean, variance, min, max) """ if a_mat is None or b_mat is None or a_mat.size == 0 or b_mat.size == 0: return [None] * 4 else: dists = cdist(a_mat, b_mat, "cosine") if dists.size == 1: # scipy would raise "FloatingPointError: invalid value encountered in double_scalars" when calling describe on a 1x1 matrix, so we use this workaround return [dists.item(), 0, dists.item(), dists.item()] else: dists_stats = stats.describe(dists, axis=None) # type: DescribeResult return [dists_stats.mean, 0 if dists_stats.variance is None else dists_stats.variance, dists_stats.minmax[0], dists_stats.minmax[1]]
def extract_from_instance(instance_file, features_file): aset = AuctionSet.load(instance_file) # shorthand variables: b = aset.bid_set.values r = aset.bid_set.quantities a = aset.ask_set.values s = aset.ask_set.quantities ### stats for average bid prices nobs, b_minmax, b_mean, b_var, b_skew, b_kurt = st.describe( b / np.sum(r, axis=1), ddof=0) ### stats for average ask prices nobs, a_minmax, a_mean, a_var, a_skew, a_kurt = st.describe( a / np.sum(s, axis=1), ddof=0) ### stats for bid bundle size nobs, r_minmax, r_mean, r_var, r_skew, r_kurt = st.describe(np.sum( r, axis=1), ddof=0) ### stats for ask bundle size nobs, s_minmax, s_mean, s_var, s_skew, s_kurt = st.describe(np.sum( s, axis=1), ddof=0) ####### heterogeneity -> resource type axis (stats inside a bundle) # stats for resource quantities demanded for each resource type: sum, mean, min, max per res type, then describe nobs, rt_sum_minmax, rt_sum_mean, rt_sum_var, rt_sum_skew, rt_sum_kurt = st.describe( np.sum(r, axis=0), ddof=0) nobs, rt_mean_minmax, rt_mean_mean, rt_mean_var, rt_mean_skew, rt_mean_kurt = st.describe( np.mean(r, axis=0), ddof=0) nobs, rt_min_minmax, rt_min_mean, rt_min_var, rt_min_skew, rt_min_kurt = st.describe( np.min(r, axis=0), ddof=0) nobs, rt_max_minmax, rt_max_mean, rt_max_var, rt_max_skew, rt_max_kurt = st.describe( np.max(r, axis=0), ddof=0) # stats for resource quantities offered for each resource type nobs, st_sum_minmax, st_sum_mean, st_sum_var, st_sum_skew, st_sum_kurt = st.describe( np.sum(s, axis=0), ddof=0) nobs, st_mean_minmax, st_mean_mean, st_mean_var, st_mean_skew, st_mean_kurt = st.describe( np.mean(s, axis=0), ddof=0) nobs, st_min_minmax, st_min_mean, st_min_var, st_min_skew, st_min_kurt = st.describe( np.min(s, axis=0), ddof=0) nobs, st_max_minmax, st_max_mean, st_max_var, st_max_skew, st_max_kurt = st.describe( np.max(s, axis=0), ddof=0) # stats for demand/supply ratio by resource types: total, mean nobs, qratio_sum_minmax, qratio_sum_mean, qratio_sum_var, qratio_sum_skew, qratio_sum_kurt = st.describe( np.sum(r, axis=0) / np.sum(s, axis=0), ddof=0) nobs, qratio_mean_minmax, qratio_mean_mean, qratio_mean_var, qratio_mean_skew, qratio_mean_kurt = st.describe( np.mean(r, axis=0) / np.mean(s, axis=0), ddof=0) # stats for surplus quantity by resource types nobs, qsurplus_sum_minmax, qsurplus_sum_mean, qsurplus_sum_var, qsurplus_sum_skew, qsurplus_sum_kurt = st.describe( np.sum(s, axis=0) - np.sum(r, axis=0), ddof=0) # quantity spread by resource type (max requested quantity of resource k - min offered quantity of resource k) nobs, qspread_minmax, qspread_mean, qspread_var, qspread_skew, qspread_kurt = st.describe( np.max(r, axis=0) - np.min(s, axis=0), ddof=0) # mid price bid_max = (b / r.sum(axis=1)).max() ask_min = (a / s.sum(axis=1)).min() mid_price = (bid_max + ask_min) / 2 # bid-ask spread ba_spread = bid_max - ask_min # total demand quantity r_total = r.sum() # total supply quantity s_total = s.sum() # total demand value b_total = b.sum() # total supply value a_total = a.sum() # surplus value per surplus unit surplus_value_per_surplus_unit = 0 if r_total == s_total else ( b_total - a_total) / (r_total - s_total) ### append features features = np.array([ ## instance name to be used as index instance_file ### group 1: instance - price related , b_mean # average_bid_price_mean , math.sqrt(b_var) # average_bid_price_stddev , b_skew # average_bid_price_skewness , b_kurt # average_bid_price_kurtosis , a_mean # average_ask_price_mean , math.sqrt(a_var) # average_ask_price_stddev , a_skew # average_ask_price_skewness , a_kurt # average_ask_price_kurtosis , bid_max # average_bid_price_max , ask_min # average_ask_price_min , mid_price # mid_price , ba_spread # bid_ask_spread , ba_spread / mid_price # bid_ask_spread_over_mid_price ### group 2: instance - quantity related , r_mean # bid_bundle_size_mean , math.sqrt(r_var) # bid_bundle_size_stddev , r_skew # bid_bundle_size_skewness , r_kurt # bid_bundle_size_kurtosis , s_mean # ask_bundle_size_mean , math.sqrt(s_var) # ask_bundle_size_stddev , s_skew # ask_bundle_size_skewness , s_kurt # ask_bundle_size_kurtosis ### group 3: instance - quantity per resource related (measure of heterogeneity) # --> demand side , rt_sum_mean # total_demand_per_resource_mean , math.sqrt(rt_sum_var) # total_demand_per_resource_stddev , rt_sum_skew # total_demand_per_resource_skewness , rt_sum_kurt # total_demand_per_resource_kurtosis , rt_mean_mean # average_demand_per_resource_mean , math.sqrt(rt_mean_var) # average_demand_per_resource_stddev , rt_mean_skew # average_demand_per_resource_skewness , rt_mean_kurt # average_demand_per_resource_kurtosis , rt_min_mean # minimum_demand_per_resource_mean , math.sqrt(rt_min_var) # minimum_demand_per_resource_stddev , rt_min_skew # minimum_demand_per_resource_skewness , rt_min_kurt # minimum_demand_per_resource_kurtosis , rt_max_mean # maximum_demand_per_resource_mean , math.sqrt(rt_max_var) # maximum_demand_per_resource_stddev , rt_max_skew # maximum_demand_per_resource_skewness , rt_max_kurt # maximum_demand_per_resource_kurtosis # --> supply side , st_sum_mean # total_supply_per_resource_mean , math.sqrt(st_sum_var) # total_supply_per_resource_stddev , st_sum_skew # total_supply_per_resource_skewness , st_sum_kurt # total_supply_per_resource_kurtosis , st_mean_mean # average_supply_per_resource_mean , math.sqrt(st_mean_var) # average_supply_per_resource_stddev , st_mean_skew # average_supply_per_resource_skewness , st_mean_kurt # average_supply_per_resource_kurtosis , st_min_mean # minimum_supply_per_resource_mean , math.sqrt(st_min_var) # minimum_supply_per_resource_stddev , st_min_skew # minimum_supply_per_resource_skewness , st_min_kurt # minimum_supply_per_resource_kurtosis , st_max_mean # maximum_supply_per_resource_mean , math.sqrt(st_max_var) # maximum_supply_per_resource_stddev , st_max_skew # maximum_supply_per_resource_skewness , st_max_kurt # maximum_supply_per_resource_kurtosis ### group 4: instance - demand-supply balance related , surplus_value_per_surplus_unit # surplus_value_per_surplus_unit , b_total / a_total # demand_supply_ratio_value , r_total / s_total # demand_supply_ratio_quantity , qratio_sum_mean # demand_supply_ratio_total_quantity_per_resource_mean , math.sqrt( qratio_sum_var ) # demand_supply_ratio_total_quantity_per_resource_stddev , qratio_sum_skew # demand_supply_ratio_total_quantity_per_resource_skewness , qratio_sum_kurt # demand_supply_ratio_total_quantity_per_resource_kurtosis , qratio_mean_mean # demand_supply_ratio_mean_quantity_per_resource_mean , math.sqrt( qratio_mean_var ) # demand_supply_ratio_mean_quantity_per_resource_stddev , qratio_mean_skew # demand_supply_ratio_mean_quantity_per_resource_skewness , qratio_mean_kurt # demand_supply_ratio_mean_quantity_per_resource_kurtosis , s_total - r_total # surplus_quantity , qsurplus_sum_mean # surplus_total_quantity_per_resource_mean , math.sqrt( qsurplus_sum_var) # surplus_total_quantity_per_resource_stddev , qsurplus_sum_skew # surplus_total_quantity_per_resource_skewness , qsurplus_sum_kurt # surplus_total_quantity_per_resource_kurtosis , qspread_mean # quantity_spread_per_resource_mean , math.sqrt(qspread_var) # quantity_spread_per_resource_stddev , qspread_skew # quantity_spread_per_resource_skewness , qspread_kurt # quantity_spread_per_resource_kurtosis , b_mean / a_mean # ratio_average_price_bid_to_ask , r_mean / s_mean # ratio_bundle_size_bid_to_ask ]) fpi = pd.DataFrame( features.reshape((1, features.shape[0])), columns=["instance", *[x.name for x in Feature_Names]]).set_index('instance') with open(features_file, "a") as f: fpi.to_csv(f, header=False, float_format='%g') f.close()
data_n = data[[0, 4, 8]] print('Data info :\n', data_n.info()) print('Data describe:\n', data_n.describe()) print('Data :\n', data_n.head(), '\n') # Gán tiêu đề cột data_n.columns = ['Tempt', 'Gender', "Beats"] print('Data :\n', data_n.head(), '\n') # 2. Vẽ histogram cho cột Tempt. plt.figure(figsize = (6, 6)) sns.distplot(data_n.Tempt) plt.show() # 3. Tìm thống kê chung của Tempt. print('\nSố liệu thống kê:\n', stats.describe(data_n.Tempt)) # $. Tìm mean, median, mode => cho nhận xét mean_T = data_n.Tempt.mean() print('Mean: ', mean_T) median_T = data_n.Tempt.median() print('Median:', median_T) mode_T = data_n.Tempt.mode() print('Mode: ', mode_T) # Nhận xét: ~ phân phối khá chuẩn # 5. Giá trị Tempt ở phân vị thứ [0, 1, 2, 2.5, 97.5, 98, 99, 100] percentiles = np.array([0, 1, 2, 2.5, 97.5, 98, 99, 100]) x = np.percentile(data_n.Tempt, percentiles) print('Percentiles[]:', x)
from sklearn.cluster import KMeans from sklearn import datasets from sklearn.metrics import confusion_matrix, accuracy_score from scipy.stats import stats import matplotlib.pyplot as plt from collections import Counter iris = datasets.load_iris() df_data = iris.data df_class = iris.target clusters, values = np.unique(df_class, return_counts=True) # ou pd.DataFrame({ 'col': df_class })['col'].value_counts() # ou Counter(df_class) stats.describe(df_data) model = KMeans(len(clusters)) model.fit(df_data) model.cluster_centers_ previsoes = model.labels_ confusion_matrix(df_class, previsoes) accuracy_score(df_class, previsoes) plt.scatter(df_data[previsoes == 0, 0], df_data[previsoes == 0, 1], c='green',
def summary(self): self.to1d() self.summ = stats.describe(self._oneDdata) # print self.summ return
def process_annotations(args): li = [] for filename in args["source_csvs"]: df = pandas.read_csv(filename, index_col=None, header=0) li.append(df) source_csv = pandas.concat(li, ignore_index=True, sort=False, axis=0) # print(tabulate(source_csv, headers='keys', tablefmt='psql')) print(source_csv.columns) accept_time_col = source_csv["AcceptTime"] submit_time_col = source_csv["SubmitTime"] # Highlight those that are too short. suspiciously_quick = [] for accept_time, submit_time in zip(accept_time_col, submit_time_col): accept_time = accept_time.replace("PDT", "").strip() submit_time = submit_time.replace("PDT", "").strip() mturk_date_format = "%a %b %d %H:%M:%S %Y" accept_time = datetime.datetime.strptime(accept_time, mturk_date_format) submit_time = datetime.datetime.strptime(submit_time, mturk_date_format) time_taken = submit_time - accept_time if time_taken.seconds / 60.0 < args["min_time"]: suspiciously_quick.append(True) else: suspiciously_quick.append(False) source_csv = source_csv.assign(too_quick=pandas.Series(suspiciously_quick)) # Story summary token_length = [] too_short = [] for summary in source_csv["Answer.storySummary"]: num_tokens = len(summary.split(" ")) token_length.append(num_tokens) if num_tokens < args["min_tokens"]: too_short.append(True) else: too_short.append(False) source_csv = source_csv.assign( num_summary_tokens=pandas.Series(token_length)) source_csv = source_csv.assign(too_short=pandas.Series(too_short)) genres = [] for index, row in source_csv.iterrows(): added = False for g in genre_categories: if row[g] == True and not added: added = True genre_name = g.split(".")[1] genres.append(genre_name) if not added: genres.append("other") source_csv = source_csv.assign(genre=pandas.Series(genres)) source_csv.to_csv(f"{args['target']}_processed.csv") # print(f"Prefiltered: {len(source_csv)}") # source_csv = source_csv.loc[(source_csv['too_quick'] == True) & (source_csv['too_short'] == True)] # print(f"Postfiltered: {len(source_csv)}") stats_dict = defaultdict(dict) for col in stats_columns: figures = source_csv[col] nobs, minmax, mean, variance, skewness, kurtosis = stats.describe( figures) stats_dict[col]["nobs"] = nobs stats_dict[col]["min"] = minmax[0] stats_dict[col]["max"] = minmax[1] stats_dict[col]["mean"] = mean stats_dict[col]["variance"] = variance stats_dict[col]["skewness"] = skewness stats_dict[col]["kurtosis"] = kurtosis stats_dict[col]["25_perc"] = numpy.percentile(figures, 25) stats_dict[col]["median"] = numpy.percentile(figures, 50) stats_dict[col]["75_perc"] = numpy.percentile(figures, 75) triples = [] for index, row in source_csv.iterrows(): worker = row[worker_id_col] story = row[story_id_col] metrics_col = row[col] triples.append((str(worker), str(story), int(metrics_col))) print(worker, story, metrics_col) t = AnnotationTask(data=triples, distance=interval_distance) stats_dict[col]["krippendorff_alpha"] = t.alpha() stats_dict[col]["average_agreement"] = t.alpha() pandas.DataFrame.from_dict( stats_dict, orient="index").to_csv(f"{args['target']}_stats.csv") genre_dict = defaultdict(dict) genre_desc_count = source_csv[genre_column].value_counts(normalize=False) genre_desc = source_csv[genre_column].value_counts(normalize=True) for (n, v), (nc, vc) in zip(genre_desc.iteritems(), genre_desc_count.iteritems()): genre_dict[n]["count"] = vc genre_dict[n]["proportion"] = v pandas.DataFrame.from_dict( genre_dict, orient="index").to_csv(f"{args['target']}_genres.csv") corr_cov_df = source_csv[stats_columns] for method in ('pearson', 'kendall', 'spearman'): correlation_df = corr_cov_df.corr(method=method) correlation_df.to_csv(f"{args['target']}_{method}_corr.csv") covariance_df = corr_cov_df.cov() covariance_df.to_csv(f"{args['target']}_cov.csv") print(source_csv.columns)
def __init__(self,data): self.N, (self.min, self.max),self.mean,self.variance,self.skewness,self.kurtosis = describe(data) self.median = median(data) self.std = std(data) # quartiles self.q1 = percentile(data,25) self.q3 = self.median self.q2 = percentile(data,75) # percentiles self.p01 = percentile(data,1) self.p025 = percentile(data,2.5) self.p05 = percentile(data,5) self.p10 = percentile(data,10) self.p90 = percentile(data,90) self.p95 = percentile(data,95) self.p975 = percentile(data,97.5) self.p99 = percentile(data,99)
from scipy.stats import binom from scipy.stats import stats # n đủ lớn, p = 0.5 ==> xấp xỉ phân phối chuẩn n = 12 p = 0.5 size = 1000 probs = [0.3, 0.5, 0.8] # data_binom = [binom.rvs(n = n, p = p, size = size) for p in probs] data_binom = binom.rvs(n = n, p = p, size = size) ax = sns.distplot(data_binom, kde = False, color = 'blue', hist_kws = {'linewidth': 15, 'alpha':1}) ax.set(xlabel = 'Binomial Distribution', ylabel = 'Frequency') print('\nSố liệu thống kê:\n', stats.describe(data_binom)) # Thí nghiệm tung đồng xu: mặt sấp hoặc mặt ngửa # - Giả sử tung một đồng xu 'công bằng' 12 lần. Tính xác suất để có 7 lần ngửa. # # P_x_k = n!/(k!)(n - k)! x p^k x (1 - p)^(n - k) k = 7 C_n_k = math.factorial(n)/(math.factorial(k) * math.factorial(n - k)) P_X_k = C_n_k * math.pow(p, k) * math.pow(1 - p, n - k) print('P(X = 7) = %.4f' %P_X_k) # Dùng hàm của python print('P(X = 7) = %.4f (PYTHON)' %binom.pmf(k, n, p, loc = 0))
#try and find correlations between columns in test and train data #now we have a_test 4041 x 859 #aX_household_train is 8203 x 859 temp = np.shape(bX_household_train) temp = temp[1] train_stat = np.zeros((temp, 6)) test_stat = np.zeros((temp, 6)) feature_corr = np.zeros(temp) for j in range(0, temp): A = bX_household_train[bX_household_train.columns[j]] B = b_test[b_test.columns[j]] train_stat[j, :] = [ describe(A).minmax[0], describe(A).minmax[1], describe(A).mean, describe(A).variance, describe(A).skewness, describe(A).kurtosis ] test_stat[j, :] = [ describe(B).minmax[0], describe(B).minmax[1], describe(B).mean, describe(B).variance, describe(B).skewness, describe(B).kurtosis ]
times = {system: [] for system in systems} with open('results/times') as f: for line in f.readlines(): values = line.split() times[values[0]].append(float(values[2])) for system in systems: folder = 'results/{}/'.format(system) totals = [] maxs = [] for file in os.listdir(folder): values = [] with open(os.path.join(folder, file)) as f: values = [ float(v) for i, v in enumerate(f.readlines()[1:]) if i > 0 and float(v) != 0] totals.append(describe(values).mean) maxs.append(max(values)) ddata = describe(totals) dmaxs = describe(maxs) print('Simulation time for {}'.format(system)) print('\tAvg: {:.0f}'.format(describe(times[system]).mean)) print('\tMax (stdev): {:.1f}'.format(sqrt(describe(times[system]).variance))) print('Memory Consumption for {}'.format(system)) print('\tAvg: {:.0f}'.format(ddata.mean)) print('\tstdev: {:.1f}'.format(sqrt(ddata.variance))) print('\tMax (avg): {:.0f}'.format(dmaxs.mean)) print('\tMax (stdev): {:.1f}'.format(sqrt(dmaxs.variance))) print()
def describe_stats(x): test_data=x.columns[0:len(x.columns)] for i in test_data: my_stats=stats.describe(x[i]) print(i,'\n',my_stats,'\n')