def test_2D(self): a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)), mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0))) actual = mstats.hmean(a) desired = ma.array((1, 2, 3, 4)) assert_array_almost_equal(actual, desired, decimal=14) actual1 = mstats.hmean(a, axis=-1) desired = (4.0 / (1 / 1.0 + 1 / 2.0 + 1 / 3.0 + 1 / 4.0), 2.0 / (1 / 2.0 + 1 / 3.0), 2.0 / (1 / 1.0 + 1 / 4.0)) assert_array_almost_equal(actual1, desired, decimal=14)
def test_1D(self): a = (1,2,3,4) actual = mstats.hmean(a) desired = 4. / (1./1 + 1./2 + 1./3 + 1./4) assert_almost_equal(actual, desired, decimal=14) desired1 = mstats.hmean(ma.array(a),axis=-1) assert_almost_equal(actual, desired1, decimal=14) a = ma.array((1,2,3,4),mask=(0,0,0,1)) actual = mstats.hmean(a) desired = 3. / (1./1 + 1./2 + 1./3) assert_almost_equal(actual, desired,decimal=14) desired1 = mstats.hmean(a,axis=-1) assert_almost_equal(actual, desired1, decimal=14)
def test_1D_float96(self): a = ma.array((1,2,3,4), mask=(0,0,0,1)) actual_dt = mstats.hmean(a, dtype=np.float96) desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3), dtype=np.float96) assert_almost_equal(actual_dt, desired_dt, decimal=14) assert_(actual_dt.dtype == desired_dt.dtype)
def main(): X_train, X_val, y_train, y_val = common.load_train_dummies() slr = make_pipeline(MinMaxScaler(), LogisticRegression()) plr = make_pipeline(PCA(), LogisticRegression()) nb_bag = BaggingClassifier(base_estimator=GaussianNB()) clfs = ( GaussianNB(), #GridSearchCV(slr, dict(logisticregression__C=[1.0, 0.8])), make_pipeline(PCA(), GaussianNB()), GridSearchCV(plr, dict(pca__n_components=[None, 3, 8], logisticregression__C=[1.0, 0.7]), scoring='roc_auc'), GridSearchCV(nb_bag, dict(max_samples=[0.2, 0.4, 0.6], max_features=[0.3, 0.7]), scoring='roc_auc'), xgb.XGBClassifier(n_estimators=20, max_depth=3, colsample_bytree=0.7, subsample=0.6, learning_rate=0.1), #make_pipeline(KMeans(), GaussianNB()), #GridSearchCV( # BaggingClassifier(), # dict(base_estimator=[None, GaussianNB(), LogisticRegression()], # n_estimators=[7, 10, 14], # max_samples=[0.3, 0.6])), #GridSearchCV(xgb.XGBClassifier(), dict(n_estimators=[2, 3, 4], learning_rate=[0.01, 0.1], subsample=[0.5, 0.9])), #BaggingClassifier(base_estimator=SVC(), max_features=0.8, max_samples=2500, n_estimators=5), ) preds = [] for clf in clfs: print clf clf.fit(X_train, y_train) val_pred = clf.predict(X_val) print roc_auc_score(y_val, val_pred) clf.fit(X_val, y_val) train_pred = clf.predict(X_train) preds.append(np.concatenate((train_pred, val_pred))) print roc_auc_score(y_train, train_pred) print y_all = np.concatenate((y_train, y_val)) preds = np.column_stack(preds) gm = gmean(preds, axis=1) hm = hmean(preds+1, axis=1) preds = np.column_stack((preds, gm, hm)) print 'GM', roc_auc_score(y_all, gm) print 'HM', roc_auc_score(y_all, hm) meta = GaussianNB() meta = GridSearchCV(xgb.XGBClassifier(), dict(max_depth=[2, 3, 4], learning_rate=[0.01, 0.05, 0.1], n_estimators=[20, 40, 60]), scoring='roc_auc') meta.fit(preds, y_all) scores = cross_val_score(meta, preds, y_all, scoring='roc_auc', cv=5) print scores print scores.mean()
x = cent[:, 0] z = cent[:, 2] # generate the random field cov_model = Gaussian(dim=dim, var=var, len_scale=len_scale) srf = SRF(cov_model, mean=mean, seed=seed) # use unstructured for a 2D vertical mesh field = srf((x, z), mesh_type='unstructured', force_moments=True) #, mode_no=100) # conductivities as log-normal distributed from the field data cond = np.exp(field) from scipy.stats.mstats import gmean, hmean arimean = np.mean(cond) harmean = hmean(cond) geomean = gmean(cond) print("The geometric mean is: " + str(geomean)) #plt.hist(field) # show the heterogeneous field plt.figure(figsize=(20, thickness / length * 20)) cond_log = np.log10(cond) plt.tricontourf(x, z, cond_log.T) plt.colorbar(ticks=[ np.min(cond_log), np.mean(cond_log), np.max(cond_log) ]) plt.title("log-normal K field [log10 K]") plt.savefig(dire + '/' + name + '.png', dpi=300,
def multiresolution_analysis_reconstruction(x, y, error_x, error_y, final_scale): ''' Parameters ---------- x : TYPE DESCRIPTION. y : TYPE DESCRIPTION. error_x : TYPE DESCRIPTION. error_y : TYPE DESCRIPTION. final_scale : Raises ------ Exception DESCRIPTION. Returns ------- None. ''' x = np.array(x, float) y = np.array(y, float) error_x = np.array(error_x, float) error_y = np.array(error_y, float) final_scale = int(final_scale) initial_scale = int(np.log2(len(x))) if final_scale <= initial_scale: raise Exception( 'final_scale should be greater than initial_scale. The value of initial_scale is: {}' .format(initial_scale)) else: for scale in range(initial_scale, final_scale): xplt = np.array([], float) yplt = np.array([], float) yp = sign = 0 nfinal = (2**(scale + 1) + 1) for i in range(len(y)): if i == 0: yp = (5 / 16) * y[int(i)] + (15 / 16) * y[int(i + 1)] - ( 5 / 16) * y[int(i + 2)] + (1 / 16) * y[int(i + 3)] yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) elif 0 < i < len(y) - 2: sign = ( (pph_interpolation_sign.sign_pph( y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]) + pph_interpolation_sign.sign_pph( y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])) / 2) yp = (y[int(i)] + y[int(i + 1)]) / 2 - ( 1 / 8 ) * sign * hmean( np.array([ abs(y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]), abs(y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)]) ], float)) yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) elif i == len(y) - 2: yp = (1 / 16) * y[int(i - 2)] - (5 / 16) * y[int(i - 1)] + ( 15 / 16) * y[int(i)] + (5 / 16) * y[int(i + 1)] yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) else: yplt = np.append(yplt, y[i]) for i, j in zip(x, y): x = pd.DataFrame(x) xm = x.rolling(2, center=True).mean() xplt = pd.concat([x, xm], axis=1).stack().sort_values( ascending=True).reset_index(drop=True).to_numpy().reshape( nfinal, 1) x = xplt yplt = np.array([yplt], float) y = yplt.reshape(nfinal, 1) for i, item in enumerate(x): for e in range(len(error_x)): if item == error_x[e]: y[i] = error_y[e] ma_reconstructed = pd.DataFrame(np.column_stack((x, y)), columns=['x', 'y']) return ma_reconstructed
def pph_interpolation(x, y, final_scale): """ Parameters ---------- x : x values of the coordenates. y : y values of the coordenates. final_scale : Final scale to reach. Raises ------ Exception final_scale should be greater than initial_scale.. Returns ------- pph_interpolation : Coordenates of the PPH 4 points interpolatory subdivision scheme at the level of discretization indicated over a regular grid. To build the values x=1 and x=n-1, Lagrange interpolatory subdivision scheme S(1,3) and S(3,1) respectively are applied. Data Frame type. """ x = np.array(x, float) y = np.array(y, float) final_scale = int(final_scale) initial_scale = int(np.log2(len(x))) if final_scale <= initial_scale: raise Exception( 'final_scale should be greater than initial_scale. The value of initial_scale is: {}' .format(initial_scale)) else: for scale in range(initial_scale, final_scale): xplt = np.array([], float) yplt = np.array([], float) yp = sign = 0 nfinal = (2**(scale + 1) + 1) for i in range(len(y)): if i == 0: yp = (5 / 16) * y[int(i)] + (15 / 16) * y[int(i + 1)] - ( 5 / 16) * y[int(i + 2)] + (1 / 16) * y[int(i + 3)] yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) elif 0 < i < len(y) - 2: sign = ( (pph_interpolation_sign.sign_pph( y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]) + pph_interpolation_sign.sign_pph( y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)])) / 2) yp = (y[int(i)] + y[int(i + 1)]) / 2 - ( 1 / 8 ) * sign * hmean( np.array([ abs(y[int(i - 1)] - 2 * y[int(i)] + y[int(i + 1)]), abs(y[int(i)] - 2 * y[int(i + 1)] + y[int(i + 2)]) ], float)) yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) elif i == len(y) - 2: yp = (1 / 16) * y[int(i - 2)] - (5 / 16) * y[int(i - 1)] + ( 15 / 16) * y[int(i)] + (5 / 16) * y[int(i + 1)] yplt = np.append(yplt, y[i]) yplt = np.append(yplt, yp) else: yplt = np.append(yplt, y[i]) for i, j in zip(x, y): x = pd.DataFrame(x) xm = x.rolling(2, center=True).mean() xplt = pd.concat([x, xm], axis=1).stack().sort_values( ascending=True).reset_index(drop=True).to_numpy().reshape( nfinal, 1) x = xplt yplt = np.array([yplt], float) y = yplt.reshape(nfinal, 1) pph_interpolation = pd.DataFrame(np.column_stack((x, y)), columns=['x', 'y']) return pph_interpolation
def main(): N = 5 ind = np.arange(N) # the x locations for the groups width = 0.35 # the width of the bars: can also be len(x) sequence lines = [] name = "res1_p4.txt" sta1 = [] sta2 = [] sta3 = [] sta4 = [] sta5 = [] sta6 = [] stations_means = [] number_packets = [] file = open(name, "r") with open(name, "r") as file: for line in file: if '10.0.0.1' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.1 : ', '') sta1 = map(float, re.findall('\d+\.\d+', line)) # print(sta1) # raw_input("Press Enter to continue...") number_packets.append(len(sta1)) sta1_mean = hmean(sta1) stations_means.append(sta1_mean) elif '10.0.0.2' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.2 : ', '') sta2 = map(float, re.findall('\d+\.\d+', line)) # print(sta2) # raw_input("Press Enter to continue...") number_packets.append(len(sta2)) sta2_mean = hmean(sta2) stations_means.append(sta2_mean) elif '10.0.0.3' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.3 : ', '') sta3 = map(float, re.findall('\d+\.\d+', line)) # print(sta3) # raw_input("Press Enter to continue...") number_packets.append(len(sta3)) sta3_mean = hmean(sta3) stations_means.append(sta3_mean) elif '10.0.0.4' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.4 : ', '') sta4 = map(float, re.findall('\d+\.\d+', line)) number_packets.append(len(sta4)) sta4_mean = hmean(sta4) # print(sta4) # raw_input("Press Enter to continue...") stations_means.append(sta4_mean) elif '10.0.0.5' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.5 : ', '') sta5 = map(float, re.findall('\d+\.\d+', line)) # print(sta5) # raw_input("Press Enter to continue...") number_packets.append(len(sta5)) sta5_mean = hmean(sta5) stations_means.append(sta5_mean) elif '10.0.0.6' in line: line = line.strip() #preprocess line line = line.replace('10.0.0.6 : ', '') sta6 = map(float, re.findall('\d+\.\d+', line)) # print(sta6) # raw_input("Press Enter to continue...") number_packets.append(len(sta6)) sta6_mean = hmean(sta6) stations_means.append(sta6_mean) else: print("Nothing!") file.close() #p1 = plt.bar(ind, stations_means, width) #p2 = plt.bar(ind, number_packets, width, bottom=stations_means) p2 = plt.bar(ind, number_packets, width, color=('orange')) plt.ylabel('Number of packets') #plt.ylabel('Harmonic mean of RTT in ms ') plt.title('Number of packets sent from STA6 to all stations') #plt.title('RTT of traffic from STA1 to all stations') plt.xticks(ind, ('STA2', 'STA3', 'STA4', 'STA5', 'STA6')) plt.yticks(np.arange(0, 2001, 500)) #From 0 to 101 with intervals of 107 #plt.yticks(np.arange(0, 61, 10)) #plt.legend((p1[0], p2[0]), ('RTT Mean', 'Num of sent packets')) plt.show()
import numpy as np import matplotlib.pyplot as plt import pandas as pd from media_geometrica_harmonica_quadratica import quadratic_mean from scipy.stats.mstats import gmean from scipy.stats.mstats import hmean import numpy as np import statistics import math dataset = pd.read_csv('census.csv') dados = dataset['age'] media = sum(dados) / len(dados) mediana = dados.median() moda = statistics.mode(dados) media_harmonica = hmean(dados) media_geometrica = gmean(dados) media_quadratica = quadratic_mean(dados) print(media) print(mediana) print(moda) print(media_harmonica) print(media_geometrica) print(media_quadratica)
from scipy.stats.mstats import gmean from scipy.stats.mstats import hmean import numpy as np import statistics import math dados = np.array([150, 151, 152, 152, 153, 154, 155, 155, 155, 155, 156, 156, 156, 157, 158, 158, 160, 160, 160, 160, 160, 161, 161, 161, 161, 162, 163, 163, 164, 164, 164, 165, 166, 167, 168, 168, 169, 170, 172, 173]) # media geometrica print(gmean(dados)) # media harmonica print(hmean(dados)) # media quadratica def quadratic_mean(dados): return math.sqrt(sum(n * n for n in dados) / len(dados)) print(quadratic_mean(dados))
import pandas as pd from scipy.stats.mstats import gmean, hmean import matplotlib.pyplot as plt my_dataset = pd.read_excel('Smith_glass_post_NYT_data.xlsx', sheet_name='Supp_traces') a_mean = my_dataset.Zr.mean() g_mean = gmean(my_dataset['Zr']) h_mean = hmean(my_dataset['Zr']) print('-------') print('arithmetic mean') print("{0:.1f} [ppm]".format(a_mean)) print('-------') print('geometric mean') print("{0:.1f} [ppm]".format(g_mean)) print('-------') print('harmonic mean') print("{0:.1f} [ppm]".format(h_mean)) print('-------') fig, ax = plt.subplots() ax.hist(my_dataset.Zr, bins='auto', density=True, edgecolor='k', label='Measurements Hist', alpha=0.8)
D_out_wn_00001, color="orange", linestyle="", marker="*", markersize="10", label="D out : white noise, Ss = 0.0001") plt.legend() plt.ylabel("Diffusivity [m^2/s]") plt.xlabel("model") plt.savefig("/Users/houben/Desktop/baseflow_sa/in_vs_out.png", dpi=300) # calculate the geomean, harmean, arimean from the derived values: from scipy.stats.mstats import gmean, hmean geomean_D_in_001 = gmean(D_in_001) harmean_D_in_001 = hmean(D_in_001) arimean_D_in_001 = np.mean(D_in_001) geomean_D_in_00001 = gmean(D_in_00001) harmean_D_in_00001 = hmean(D_in_00001) arimean_D_in_00001 = np.mean(D_in_00001) geomean_D_out_mhm_001 = gmean(D_out_mhm_001) harmean_D_out_mhm_001 = hmean(D_out_mhm_001) arimean_D_out_mhm_001 = np.mean(D_out_mhm_001) geomean_D_out_wn_001 = gmean(D_out_wn_001) harmean_D_out_wn_001 = hmean(D_out_wn_001) arimean_D_out_wn_001 = np.mean(D_out_wn_001) geomean_D_out_mhm_00001 = gmean(D_out_mhm_00001) harmean_D_out_mhm_00001 = hmean(D_out_mhm_00001) arimean_D_out_mhm_00001 = np.mean(D_out_mhm_00001) geomean_D_out_wn_00001 = gmean(D_out_wn_00001) harmean_D_out_wn_00001 = hmean(D_out_wn_00001)
peso = df_mamiferos['bodywt'] peso = np.ceil(peso) mp = (sono * peso).sum() / peso.sum() mp # média aritmética from scipy.stats.mstats import gmean mg = gmean(sono) mg # é importante notar que a média geométrica tende a ser sempre menor que a média aritimética # média harmonica from scipy.stats.mstats import hmean mh = hmean(sono) mh # é importante notar que a média harmonica tende a ser sempre menor que a média geométrica # média harmonica é a menor delas e a aritmetica é a maior # na média harmonica não pode haver numero zero, pois não pode haver divisão por zero # outra forma de calcular a media harmonica usando outra biblioteca import statistics as sss sss.harmonic_mean(sono)
def engine(args): check_condition(args) if not args.remove: create_file.engine(args) sizes = list(range(args.start_size, args.end_size + 1, args.step_size)) sizes_in_str = ["{}MB".format(size) for size in sizes] TIMER = Timer() NAME_OF_FUNCTION = "hash" if args.function == "h" else "encryption" if args.function == "e" else "decryption" elapsed_time = [] for size, size_str in zip(sizes, sizes_in_str): tmp_elapsed_time = [] file_name = FILE_NAME_FORMAT.format(size_str) if args.remove: if os.path.isfile(file_name) == False: create_file.create_a_file(file_name, size * 1024 ** 2, 3 * 1024 ** 2) for _ in range(args.ntrial): TIMER.start(size_str) if args.function == "e": __try_encrypting_file__(file_name, ".tmp") elif args.function == "d": __try_decrypting_file__(file_name, ".tmp") elif args.function == "h": __hash_a_file__(file_name) TIMER.end(size_str) if args.function in ["e", "d"]: if os.path.isfile(".tmp"): os.remove(".tmp") tmp_elapsed_time.append(TIMER.get(size_str)) if args.remove: if os.path.isfile(file_name): os.remove(file_name) if args.mean == "AM": elapsed_time.append(mean(tmp_elapsed_time)) elif args.mean == "GM": elapsed_time.append(gmean(tmp_elapsed_time)) else: elapsed_time.append(hmean(tmp_elapsed_time)) print("Elapsed time for {} of {} is {:.2f}s".format(NAME_OF_FUNCTION, file_name, elapsed_time[-1])) print("Variation of elapsed time is {:.2f}".format(variation(tmp_elapsed_time))) print("-----------------------------------------------------------") print("Variation of all elapsed time is {:.2f}".format(variation(elapsed_time))) if args.display: plt.plot(sizes, elapsed_time) plt.xticks(sizes[::4], sizes_in_str[::4]) plt.ylabel("{} time (s)".format(NAME_OF_FUNCTION)) plt.xlabel("Size of data (MB)") nticks = 5 mintick = args.start_size maxtick = max(range(args.start_size, args.end_size + 1, args.step_size)) steptick = __round_int__((maxtick - mintick) // nticks, 1) ticks = range(mintick, maxtick + 1, steptick) labelticks = ["{}".format(tick) for tick in ticks] plt.xticks(ticks, labelticks) plt.show()
import numpy as np from scipy.stats.mstats import gmean, hmean x = gmean([1, 3, 9]) y = hmean([1, 3, 9]) #Lambda functions lm1 = lambda a: a + 10 lm2 = lambda a, b: a * b print(lm1(5)) print(lm2(2, 3)) def lm3(n): return lambda a: a * n lm4 = lm3(4) print(lm4(11)) from functools import reduce li = [5, 7, 22, 97, 54, 62, 77, 23, 73, 61, 73] final_list = list(map(lambda x: x * 2, li)) sum1 = reduce((lambda x, y: x + y), li) odd_time = reduce((lambda a, b: a ^ b), li) print(odd_time) list2 = ["geeks", "geeg", "keek", "practice", "aa"]
plt.tick_params(rotation=20) plt.title("histogramm of kf values") plt.savefig(CWD + "/kf_values" + "/hist_kf.png", dpi=300) plt.close() plt.semilogy(sorted(kf_list)) plt.title("kf values") plt.savefig(CWD + "/kf_values" + "/plot_kf.png", dpi=300) plt.close() sns.distplot(kf_list, hist=False, rug=True) plt.title("kerne density of kf values") plt.tick_params(rotation=45) plt.savefig(CWD + "/kf_values" + "/kde_kf.png", dpi=300) kf_list_file = open(CWD + "/kf_values" + "/kf_list_file.txt", "w") kf_list_file.write("geomean, harmean, arimean\n") kf_list_file.write( str(gmean(kf_list)) + ", " + str(hmean(kf_list)) + ", " + str(np.mean(kf_list)) + "\n") kf_list_file.write("list of kf values\n") kf_list_file.write("\n".join([str(i) for i in kf_list])) kf_list_file.close() # Set a start value for "overall_count" which is the index. I recommend to use # as much digits as you will be generating new ogs models to end up with a # consistant naming. I.e. if more than 100 take 1001 as start. start = 1001 overall_count = start # -------------------- model configurations # Specify the PROCESS and PRIMARY_VARIABLE pcs_type_flow = "GROUNDWATER_FLOW" var_name_flow = "HEAD" # Give it a name.
harmean = [] geomean = [] realizations = 10 for i in range(realizations): #np.random.seed(1337) mean = -10 sigma = 1 size = 100 kf_list = np.random.lognormal(mean=mean, sigma=sigma, size=i * size) from scipy.stats.mstats import gmean, hmean arimean.append(np.mean(kf_list)) harmean.append(hmean(kf_list)) geomean.append(gmean(kf_list)) import matplotlib.pyplot as plt x = [i * size for i in range(realizations)] #plt.plot(x, arimean, label="arimean") #plt.plot(x, harmean, label="harmean") #plt.plot(x, geomean, label="geomean") plt.semilogy(x, arimean, label="arimean") plt.semilogy(x, harmean, label="harmean") plt.semilogy(x, geomean, label="geomean") plt.semilogy(sorted(kf_list), label="kf values") #plt.ylim(0.00004,0.00005) plt.ylabel("mean of samples")
import pandas as pd import scipy.stats.mstats as sc data = pd.read_csv('data/CARS.csv') print("SPEED:\nArithmetic Mean = " + str(data['speed'].mean())) print("Geometric Mean = " + str(sc.gmean(data['speed']))) print("Harmonic Mean = " + str(sc.hmean(data['speed'])) + "\n") print("Distance:\nArithmetic Mean = " + str(data['dist'].mean())) print("Geometric Mean = " + str(sc.gmean(data['dist']))) print("Harmonic Mean = " + str(sc.hmean(data['dist'])))
def main(): """""" parser = argparse.ArgumentParser( __doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--species", type=argparse.FileType("r"), required=True) parser.add_argument("--out", type=argparse.FileType("w"), default=sys.stdout) parser.add_argument("--format", choices=tabulate._table_formats.keys(), default="latex") parser.add_argument("--level", default="transcript", choices=line_correspondence.keys()) args = parser.parse_args() species = yaml.load(args.species) # Names are the SPECIES names = species.pop("names") # Categories are Real vs Simulated data categories = species.pop("categories") # We want to create a table of the form # Species | Method || Category || Category || .. # species | Name | Prec | Rec | F1 ||Prec | Rec | F1 || .. name_ar = [] for category in categories: for name in names: # print(category, name) key = [ _ for _ in species.keys() if isinstance(species[_], dict) and species[_]["name"] == name and species[_]["category"] == category ] # print(key) assert len(key) == 1 key = key.pop() name_ar.append(key) name_ar = np.array(list(grouper(name_ar, ceil(len(species) / 2), None))) header = [""] + list(categories) header.append(["Species", "Aligner", "Method"] + ["Precision", "Recall", "F1"] * 2) rows = [] # print(rows) key = None methods = None divisions = None for yrow, name in enumerate(names): new_rows = OrderedDict() for xrow, category in enumerate(categories): try: key = name_ar[xrow, yrow] except IndexError: raise IndexError(name_ar, xrow, yrow) if key is None: raise IndexError(name_ar, xrow, category, yrow, name) # continue with open(species[key]["configuration"]) as configuration: options = parse_configuration(configuration, prefix=species[key]["folder"]) # Assembler if methods is None: methods = list(options["methods"]) divisions = list(options["divisions"]) for method in options["methods"]: # Aligner for division in options["divisions"]: meth_key = (method, division) if meth_key not in new_rows: new_rows[meth_key] = OrderedDict() try: orig, filtered = options["methods"][method][ division] except TypeError: warnings.warn( "Something went wrong for {}, {}; continuing". format(method, division)) new_rows[meth_key][category] = (-10, -10, -10) continue orig_lines = [line.rstrip() for line in open(orig)] filtered_lines = [ line.rstrip() for line in open(filtered) ] for index, line_index in enumerate( [line_correspondence[args.level]]): precision = float(orig_lines[line_index].split(":") [1].split()[1]) recall = float(filtered_lines[line_index].split( ":")[1].split()[0]) try: f1 = hmean(np.array([precision, recall])) except TypeError as exc: raise TypeError("\n".join([ str(_) for _ in [( precision, type(precision)), (recall, type(recall)), exc] ])) # print(level, method, division, (precision, recall, f1)) new_rows[meth_key][category] = (precision, recall, f1) begun = False for division in divisions: division_done = False for method in methods: meth_key = (method, division) if not begun: row = [name] begun = True else: row = [""] if not division_done: row.append(division) division_done = True else: row.append("") row.append(method) # row.append(meth_key) # print(new_rows[meth_key].keys()) for category in new_rows[meth_key]: row.extend(new_rows[meth_key][category]) rows.append(row) print(tabulate.tabulate(rows, headers=header, tablefmt=args.format)) # print(categories) return