def test_read_open_ms(self): exp = ca.read_ms(self.openms_csv, normalize=None, data_file_type='openms') # test we get the MZ and RT correct self.assertIn('MZ', exp.feature_metadata) self.assertIn('RT', exp.feature_metadata) self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 118.0869) self.assertEqual(exp.feature_metadata['RT'].iloc[1], 23.9214) # test normalizing exp = ca.read_ms(self.openms_csv, normalize=10000, data_file_type='openms') assert_array_almost_equal(exp.data.sum(axis=1), np.ones(exp.shape[0]) * 10000) # test load sparse exp = ca.read_ms(self.openms_csv, sparse=True, normalize=None, data_file_type='openms') self.assertEqual(exp.sparse, True)
def test_read_gnps_ms(self): # load the gnps exported table with associated sample metadata and cluster info exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map, data_file_type='gnps-ms2', normalize=None) # verify the load extracts required fields to metadata self.assertEqual(exp.data[2, 3], 139692) # # test normalizing exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map, data_file_type='gnps-ms2', normalize=10000) assert_array_almost_equal(exp.data.sum(axis=1), np.ones(exp.shape[0]) * 10000) # # test load sparse exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map, data_file_type='gnps-ms2', normalize=None, sparse=True) self.assertEqual(exp.sparse, True)
def test_get_spurious_duplicates(self): # load an mzmine2 metabolomics table, and associated gnps clusterinfo file exp = ca.read_ms(self.mzmine2_csv, sample_metadata_file=self.gnps_map, data_file_type='mzmine2', use_gnps_id_from_AllFiles=False, normalize=None) # get rid of the all 0s metabolite (to get rid of std=0 warning) exp = exp.filter_sum_abundance(0.1) res = exp.get_spurious_duplicates() # no samples filtered away self.assertEqual(res.shape[0], 6) # default parameters don't identify and suspicious features self.assertEqual(res.shape[1], 0) res = exp.get_spurious_duplicates(mz_tolerance=100, rt_tolerance=0.5) self.assertEqual(res.shape[1], 0) res = exp.get_spurious_duplicates(rt_tolerance=1) self.assertEqual(res.shape[1], 0) res = exp.get_spurious_duplicates(mz_tolerance=100, rt_tolerance=1) self.assertEqual(res.shape[1], 2) res = exp.get_spurious_duplicates(mz_tolerance=100, rt_tolerance=1, corr_thresh=0.2) self.assertEqual(res.shape[1], 4)
def test_read_open_ms_samples_rows(self): exp = ca.read_ms(self.openms_samples_rows_csv, normalize=None, sample_in_row=True, data_file_type='openms') # test we get the MZ and RT correct self.assertIn('MZ', exp.feature_metadata) self.assertIn('RT', exp.feature_metadata) self.assertAlmostEqual(exp.feature_metadata['MZ'].iloc[1], 118.0869) self.assertAlmostEqual(exp.feature_metadata['RT'].iloc[1], 23.9214)
def test_read_biom_ms(self): # load a biom table with MZ/RT in featureID, and associated gnps clusterinfo file exp = ca.read_ms(self.ms_biom_table, sample_metadata_file=self.gnps_map, data_file_type='biom', use_gnps_id_from_AllFiles=False, normalize=None) self.assertIn('MZ', exp.feature_metadata) self.assertIn('RT', exp.feature_metadata) self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 899.53) self.assertEqual(exp.feature_metadata['RT'].iloc[0], 314)
def test_read_mzmine2_ms_with_idstr(self): # load an mzmine2 metabolomics table with the sampleids inflated with additional info exp = ca.read_ms(self.mzmine2_with_idstr_csv, sample_metadata_file=self.gnps_map, use_gnps_id_from_AllFiles=False, cut_sample_id_sep='_', normalize=None) self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 200) self.assertEqual(exp.feature_metadata['RT'].iloc[0], 1) self.assertEqual(exp.sample_metadata['field2'][0], 'f') self.assertEqual(exp.data[2, 1], 35900) self.assertEqual(exp.data.shape, (6, 6))
def test_read_mzmine2_ms(self): # load an mzmine2 metabolomics table, and associated gnps clusterinfo file exp = ca.read_ms(self.mzmine2_csv, sample_metadata_file=self.gnps_map, data_file_type='mzmine2', use_gnps_id_from_AllFiles=False, normalize=None) self.assertIn('MZ', exp.feature_metadata) self.assertIn('RT', exp.feature_metadata) self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 200) self.assertEqual(exp.feature_metadata['RT'].iloc[0], 1) self.assertEqual(exp.data[2, 1], 35900)
def test_merge_similar_features(self): # load an mzmine2 metabolomics table, and associated gnps clusterinfo file exp = ca.read_ms(self.mzmine2_csv, sample_metadata_file=self.gnps_map, data_file_type='mzmine2', use_gnps_id_from_AllFiles=False, normalize=None) # no merging since features are far away res = exp.merge_similar_features() self.assertEqual(res.shape[1], 6) # a little merging res = exp.merge_similar_features(mz_tolerance=100, rt_tolerance=1) self.assertEqual(res.shape[1], 3) self.assertEqual(res.feature_metadata.at[85022, '_calour_merge_ids'], '85022;93277') # a lot of merging res = exp.merge_similar_features(mz_tolerance=400, rt_tolerance=6) self.assertEqual(res.shape[1], 2) self.assertEqual(res.feature_metadata.at[121550, '_calour_merge_ids'], '121550')
def test_filter_mz_rt(self): # load an mzmine2 metabolomics table, and associated gnps clusterinfo file exp = ca.read_ms(self.mzmine2_csv, sample_metadata_file=self.gnps_map, data_file_type='mzmine2', use_gnps_id_from_AllFiles=False, normalize=None) # mz filtering res = exp.filter_mz_rt(100) self.assertEqual(len(res.feature_metadata), 1) self.assertEqual(res.feature_metadata['MZ'].values, [100]) res = exp.filter_mz_rt([100, 201]) self.assertEqual(len(res.feature_metadata), 1) self.assertEqual(res.feature_metadata['MZ'].values, [100]) res = exp.filter_mz_rt([100, 201], mz_tolerance=1) self.assertEqual(len(res.feature_metadata), 2) npt.assert_array_equal(res.feature_metadata['MZ'].values, [100, 200]) res = exp.filter_mz_rt([100, 201], negate=True) self.assertEqual(len(res.feature_metadata), 5) # rt filtering res = exp.filter_mz_rt(rt=[1, 2.5]) self.assertEqual(len(res.feature_metadata), 1) self.assertEqual(res.feature_metadata['RT'].values, [1]) res = exp.filter_mz_rt(rt=[1, 2.5], rt_tolerance=0.5) self.assertEqual(len(res.feature_metadata), 3) npt.assert_array_equal(res.feature_metadata['RT'].values, [1, 2, 3]) # complex - both mz and rt res = exp.filter_mz_rt([101, 200, 400, 505], [1, 3, 4, 5], mz_tolerance=2) self.assertEqual(res.shape[1], 2)