def test_Norm_PQN_Values_Ref_Sample(): """Probabilistic Quotient Normalization - testing with reference sample as a sample of the dataset.""" Imputated = sca.NaN_Imputation(MetAna_O2, 0) normPQN = sca.Norm_PQN(Imputated, ref_sample = Imputated.columns[0]) assert_frame_equal(MetAna_PQN, normPQN)
else: df = pd.read_csv(filename, header=0, sep=',', index_col=0).rename_axis('sample', axis='columns') df = df.rename_axis('mz/rt', axis='index') # these may exist, repeating information df = df.drop(columns=["mz", "rt"], errors='ignore') if not has_labels: # force labels df = mtl.add_labels(df, labels=['KO', 'WT']) return df aligned_all_neg = reading_MetAna_file('aligned_1ppm_min2_1ppm_negative.csv', has_labels=True) preprocessed = sca.NaN_Imputation(aligned_all_neg, 0).pipe(sca.ParetoScal) dist_euclidian = dist.pdist(preprocessed.T, metric='euclidean') Z_euc = hier.linkage(dist_euclidian, method='average') # global_dist, discrims = ma.dist_discrim(aligned_all_neg, Z_euc, method='average') # print(global_dist, discrims) def test_dist_dicrim_average(): global_dist, discrims = ma.dist_discrim(aligned_all_neg, Z_euc, method='average') # assert str(discrim_ave[0]) == str(np.array(list(discrim_ave[1].values())).mean()) assert global_dist == approx(np.array(list(discrims.values())).mean())
def test_Norm_RF_Values(): """Normalization by a reference feature only - 301/2791.68 (random choice).""" Imputated = sca.NaN_Imputation(MetAna_O2, 0) normRF = sca.Norm_Feat(Imputated, "301/2791.68") # assert str(MetAna_N) == str(norm*1000) assert_frame_equal(MetAna_N, normRF*1000)
def test_Norm_TI_Values(): """Normalization by the total peak area""" Imputated = sca.NaN_Imputation(MetAna_O2, 0) normTI = sca.Norm_TotalInt(Imputated) assert_frame_equal(MetAna_NTI, normTI*1000)
def test_NaN_MinValue(): imputated = sca.NaN_Imputation(MetAna_O2, minsample=0) minimum = (MetAna_O2.min().min())/2 where_null = MetAna_O2.isnull() mean_imputed = imputated[where_null].mean().mean() assert mean_imputed == approx(minimum)
def test_NaN_FeatRemove(): assert len(sca.NaN_Imputation(MetAna_O2, 1/2 + 0.00001)) == len(MetAna_I2T.columns)-1
def test_ParetoScal_values(): Imputated = sca.NaN_Imputation(MetAna_O2, 0) pareto = sca.ParetoScal(Imputated) assert_frame_equal(MetAna_P, pareto)
def test_Norm_QN_Values(): """Quantile Normalization with reference sample type = mean""" Imputated = sca.NaN_Imputation(MetAna_O2, 0) normQN = sca.Norm_Quantile(Imputated, ref_type = 'mean') assert_frame_equal(MetAna_QN, normQN)
def test_glog_values(): glog = sca.glog(MetAna_O2_I) # assert str(MetAna_G.data) == str(glog.data) assert_frame_equal(MetAna_G, glog) def test_glog_lamb(): lamb = 100000 y = MetAna_O2_I.copy() y = np.log2((y + (y**2 + lamb**2)**0.5)/2) assert (y == (sca.glog(MetAna_O2_I, lamb = 100000))).all().all() # Tests for ParetoScal MetAna_O2_I = sca.NaN_Imputation(MetAna_O2, 0) def test_ParetoScal_values(): Imputated = sca.NaN_Imputation(MetAna_O2, 0) pareto = sca.ParetoScal(Imputated) assert_frame_equal(MetAna_P, pareto) # Tests for MeanCentering # If Pareto and AutoScale work, MeanCentering should work # Tests for AutoScal MetAna_AS = reading_MetAna_file('MetAnalyst/MetAna_AutoScal.csv') # Auto Scaling only def test_AutoScal_values():