def test_special_peaklists(self): pkls = [ PeakList('peaklist_' + str(i), np.ones(10) * 10, np.ones(10)) for i in range(6) ] try: pm = align_peaks(pkls, ppm=2.0, block_size=5, fixed_block=False, edge_extend=10, ncpus=2) except Exception as e: self.fail('alignment failed: ' + str(e)) self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.])) self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis=0), [6])) self.assertTrue( np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0), [60])) try: pm = align_peaks(pkls, ppm=1e-10, block_size=1, fixed_block=True, edge_extend=1, ncpus=2) except Exception as e: self.fail('alignment failed: ' + str(e)) self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.])) self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis=0), [6])) self.assertTrue( np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0), [60]))
def average_replicate_peaklists(pls, ppm, min_peaks, rsd_thres=None, block_size=5000, ncpus=None): pm = align_peaks(pls, ppm, block_size, ncpus) prefix = os.path.commonprefix([p.ID for p in pls]) merged_id = "{}{}".format( prefix, "_".join( map(str, [p.ID.replace(prefix, "").split(".")[0] for p in pls]))) pl = pm.to_peaklist(ID=merged_id) if "snr" in pm.attributes: pl.add_attribute("snr", pm.attr_mean_vector("snr"), on_index=2) pl.add_attribute("rsd", pm.rsd(flagged_only=False), on_index=5) pl.add_attribute("present_flag", pm.present >= min_peaks, is_flag=True) if rsd_thres is not None: rsd_flag = map(lambda x: not np.isnan(x) and x < rsd_thres, pl.get_attribute("rsd", flagged_only=False)) pl.add_attribute("rsd_flag", rsd_flag, flagged_only=False, is_flag=True) return pl
def test_peak_matrix_portal(self): pkls = self._createPeaklists() pm = align_peaks(pkls, ppm=2.0, block_size=10, ncpus=2) pm.mask_tags('qc') pnum = pm.full_shape[1] pm.add_flag('odd_flag', ([0, 1] * int(pnum / 2. + 1))[:pnum]) pm.add_flag('qua_flag', ([0, 0, 0, 1] * int(pnum / 4. + 1))[:pnum], flagged_only=False) save_peak_matrix_as_hdf5(pm, '.test_peak_matrix.hdf5') npm = load_peak_matrix_from_hdf5('.test_peak_matrix.hdf5') self.assertEqual(pm.shape, npm.shape) self.assertEqual(pm.full_shape, npm.full_shape) self.assertTupleEqual(pm.attributes, npm.attributes) self.assertTrue(np.allclose(pm.mz_matrix, npm.mz_matrix)) self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix)) self.assertTrue( np.allclose(pm.attr_matrix('snr'), npm.attr_matrix('snr'))) self.assertTrue( np.all(pm.attr_matrix('lab') == npm.attr_matrix('lab'))) self.assertTrue( np.all( pm.property('present_matrix', flagged_only=False) == npm.property('present_matrix', flagged_only=False))) self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types) self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values) self.assertTrue(np.all(pm.mask == npm.mask)) self.assertTrue( np.all(pm.flag_values('odd_flag') == npm.flag_values('odd_flag'))) self.assertTrue( np.all(pm.flag_values('qua_flag') == npm.flag_values('qua_flag'))) self.assertTrue(np.all(pm.flags == npm.flags))
def test_ppm(self): pkls = self._createPeakLists() try: pm = align_peaks(pkls, ppm=1e+10, block_size=5, fixed_block=True, edge_extend=10, ncpus=2) except Exception as e: self.fail('alignment failed: ' + str(e)) self.assertTrue( np.allclose( pm.to_peaklist('merged').mz, [np.mean(list(map(np.mean, self.mz)))])) self.assertTrue( np.allclose(pm.intensity_matrix.flatten(), list(map(np.mean, self.ints)))) self.assertTrue( np.allclose( pm.attr_matrix('intra_count').flatten(), list(map(len, self.mz)))) try: pm = align_peaks(pkls, ppm=1e-10, block_size=5, fixed_block=True, edge_extend=10, ncpus=2) except Exception as e: self.fail('alignment failed: ' + str(e)) self.assertTrue( np.allclose( pm.to_peaklist('merged').mz, np.sort(reduce(lambda x, y: x + y, list(map(list, self.mz)))))) self.assertTrue( np.allclose(np.sort(np.sum(pm.intensity_matrix, axis=0)), np.sort(reduce(lambda x, y: x + y, self.ints)))) self.assertTrue( np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0), np.ones(pm.shape[1])))
def test_block_size(self): pkls = self._createPeakLists() try: pm = align_peaks(pkls, ppm=2.0, block_size=1, fixed_block=True, edge_extend=10, ncpus=2) except Exception, e: self.fail('alignment failed: ' + str(e))
def test_peak_matrix_portal(self): _mzs = lambda: sorted(np.random.uniform(100, 1200, size=100)) _ints = lambda: np.abs(np.random.normal(100, 10, size=100)) pkls = [ PeakList('sample_1_1', _mzs(), _ints()), PeakList('sample_1_2', _mzs(), _ints()), PeakList('QC_1', _mzs(), _ints()), PeakList('sample_2_1', _mzs(), _ints()), PeakList('sample_2_2', _mzs(), _ints()), PeakList('QC_2', _mzs(), _ints()), ] for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t) for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t) for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t) for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t) pm = align_peaks(pkls, ppm=2e+4, block_size=10, ncpus=2) pm.add_flag('odd_flag', ([0, 1] * int(pm.shape[1] / 2 + 1))[:pm.shape[1]]) pm.add_flag('qua_flag', ([0, 0, 1, 1] * int(pm.shape[1] / 4 + 1))[:pm.shape[1]]) save_peak_matrix_as_txt(pm, '.test_peak_matrix.txt', samples_in_rows=True, comprehensive=True, rsd_tags=('qc', Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'))) npm = load_peak_matrix_from_txt('.test_peak_matrix.txt', samples_in_rows=True, comprehensive='auto') self.assertEqual(pm.shape, npm.shape) self.assertEqual(pm.full_shape, npm.full_shape) self.assertTrue(np.all(pm.flags == npm.flags)) self.assertTrue(np.all(pm.flag_names == npm.flag_names)) self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix)) self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types) self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
def test_normal_alignment(self): pkls = self._createPeakLists() try: pm = align_peaks(pkls, ppm=2.0, block_size=5, fixed_block=True, edge_extend=10, ncpus=2) # print pm.attr_matrix('str_attr') # print pm.attr_mean_vector('str_attr') except Exception, e: self.fail('alignment failed: ' + str(e))
def average_replicate_scans(name, pls, ppm=2.0, min_fraction=0.8, rsd_thres=30.0, block_size=5000, ncpus=None): emlst = np.array(map(lambda x: x.size == 0, pls)) if np.sum(emlst) > 0: logging.warning( 'No scan data available for [%s]' % join(map(str, [p.ID for e, p in zip(emlst, pls) if e]), ',')) pls = [p for e, p in zip(emlst, pls) if not e] pm = align_peaks(pls, ppm=ppm, block_size=block_size, ncpus=ncpus) pl_avg = pm.to_peaklist(ID=name) # meta data for pl in pls: for k, v in pl.metadata.items(): if k not in pl_avg.metadata: pl_avg.metadata[k] = [] if v is not None: pl_avg.metadata[k].append(v) pl_avg.add_attribute("snr", pm.attr_mean_vector('snr'), on_index=2) pl_avg.add_attribute("snr_flag", np.ones(pl_avg.full_size), flagged_only=False, is_flag=True) pl_avg.add_attribute("rsd", pm.rsd(flagged_only=False), on_index=5) if min_fraction is not None: pl_avg.add_attribute("fraction_flag", (pm.present / float(pm.shape[0])) >= min_fraction, flagged_only=False, is_flag=True) if rsd_thres is not None: if pm.shape[0] == 1: logging.warning( 'applying RSD filter on single scan, all peaks removed') rsd_flag = map(lambda x: not np.isnan(x) and x < rsd_thres, pl_avg.get_attribute("rsd", flagged_only=False)) pl_avg.add_attribute("rsd_flag", rsd_flag, flagged_only=False, is_flag=True) return pl_avg
def test_special_peaklists(self): pkls = [ PeakList('peaklist_' + str(i), np.ones(10) * 10, np.ones(10)) for i in range(6) ] try: pm = align_peaks(pkls, ppm=2.0, block_size=5, fixed_block=False, edge_extend=10, ncpus=2) except Exception, e: self.fail('alignment failed: ' + str(e))
def test_single_peaklist(self): pkls = [ PeakList('peaklist_0', np.arange(10, 110, step=10), np.arange(10) + 11) ] try: pm = align_peaks(pkls, ppm=2.0, block_size=5, fixed_block=True, edge_extend=10, ncpus=2) except Exception, e: self.fail('alignment failed: ' + str(e))
def test_single_peaklist(self): pkls = [ PeakList('peaklist_0', np.arange(10, 110, step=10), np.arange(10) + 11) ] try: pm = align_peaks(pkls, ppm=2.0, block_size=5, fixed_block=True, edge_extend=10, ncpus=2) except Exception as e: self.fail('alignment failed: ' + str(e)) self.assertTrue( np.allclose( pm.to_peaklist('merged').mz, np.arange(10, 110, step=10))) self.assertTrue(np.allclose(pm.intensity_matrix, [np.arange(10) + 11]))
def average_replicate_scans(name, pls, ppm=2.0, min_fraction=0.8, rsd_thres=30.0, rsd_on="intensity", block_size=5000, ncpus=None): emlst = np.array([x.size == 0 for x in pls]) if np.sum(emlst) > 0: logging.warning('No scan data available for {}'.format( str([p.ID for e, p in zip(emlst, pls) if e]))) pls = [p for e, p in zip(emlst, pls) if not e] pm = align_peaks(pls, ppm=ppm, block_size=block_size, ncpus=ncpus) pl_avg = pm.to_peaklist(ID=name) # meta data for pl in pls: for k, v in list(pl.metadata.items()): if k not in pl_avg.metadata: pl_avg.metadata[k] = [] if v is not None: pl_avg.metadata[k].append(v) if rsd_on != "intensity": pl_avg.add_attribute(rsd_on, pm.attr_mean_vector(rsd_on), on_index=2) rsd_label = "rsd_{}".format(rsd_on) shift = 1 else: rsd_label = "rsd" shift = 0 pl_avg.add_attribute("snr", pm.attr_mean_vector('snr'), on_index=2 + shift) pl_avg.add_attribute("snr_flag", np.ones(pl_avg.full_size), flagged_only=False, is_flag=True) pl_avg.add_attribute(rsd_label, pm.rsd(on_attr=rsd_on, flagged_only=False), on_index=5 + shift) if min_fraction is not None: pl_avg.add_attribute("fraction_flag", (pm.present / float(pm.shape[0])) >= min_fraction, flagged_only=False, is_flag=True) if rsd_thres is not None: if pm.shape[0] == 1: logging.warning( 'applying RSD filter on single scan, all peaks removed') rsd_flag = [ not np.isnan(x) and x < rsd_thres for x in pl_avg.get_attribute(rsd_label, flagged_only=False) ] pl_avg.add_attribute("{}_flag".format(rsd_label), rsd_flag, flagged_only=False, is_flag=True) return pl_avg
def tree2peaklist(tree_pth, adjust_mz=True, merge=True, ppm=5, ms1=True, out_pth='', name=''): #################################################################################################################### # Extract peaklists from msnpy #################################################################################################################### trees = load_trees(tree_pth) plsd = {} all_ms1_precursors = {} # get peaklist for each header for tree in trees: plsd[tree.graph['id']] = [] # For each tree we look at each "header" e.g. the same mass spectrometry data (processed prior by dimspy-msnpy) # And create a peaklist for each header. (....probably a better way of doing this perhaps iterating through # the tree instead?). Anyway this seems to work OK. its = tree.nodes.items() # add id to tree values [i[1].update({'id': i[0]}) for i in its] tv = [i[1] for i in its] # requires sorting for itertools.groupby to work properly tv = sorted(tv, key=lambda i: i['header']) for header, group in itertools.groupby(tv, key=lambda x: x['header']): # get mz, intensity, mass, molecular formula, adduct mtch = re.search('.*Full ms .*', header) if mtch: # full scan continue precursor_detail_track = [] mz = [] intensity = [] mass = [] mf = [] adduct = [] metad = {'tree_id': tree.graph['id'], 'header': header, 'parent': {}} for d in list(group): # get precursor details for each level for n in tree.predecessors(d['id']): pd = tree.nodes.get(n) # check if we already have this precursor details if pd['mslevel'] in precursor_detail_track: continue metad['parent'][pd['mslevel']] = {} metad['parent'][pd['mslevel']]['mz'] = pd['mz'] if 'mf' in pd: mf_details_p = get_mf_details(pd) metad['parent'][pd['mslevel']]['mass'] = mf_details_p['mass'] metad['parent'][pd['mslevel']]['adduct'] = mf_details_p['adduct'] metad['parent'][pd['mslevel']]['mf'] = mf_details_p['mf'] precursor_detail_track.append(pd['mslevel']) if ms1: if adjust_mz: all_ms1_precursors[mf_details_p['mass']] = pd['intensity'] else: all_ms1_precursors[pd['mz']] = pd['intensity'] mz.append(d['mz']) intensity.append(d['intensity']) if 'mf' in d: mf_details = get_mf_details(d) mass.append(mf_details['mass']) mf.append(mf_details['mf']) adduct.append(mf_details['adduct']) if len(mz)<1: continue if adjust_mz: mza = mass else: mza = mz # create dimspy array object if mf: mza, intensity, mass, mf, adduct = sort_lists(mza, intensity, mass, mf, adduct) else: mza, intensity = sort_lists(mza, intensity) pl = PeakList(ID='{}: {}'.format(tree.graph['id'], header), mz=mza, intensity=intensity, **metad) print(pl) if mf: pl.add_attribute('mass', mass) pl.add_attribute('mz_original', mz) pl.add_attribute('mf', mf) pl.add_attribute('adduct', adduct) plsd[tree.graph['id']].append(pl) pls = [y for x in list(plsd.values()) for y in x] if out_pth: save_peaklists_as_hdf5(pls, os.path.join(out_pth, '{}_non_merged_pls.hdf5'.format(name))) # Merge if merge: merged_pls = [] for (key, pls) in iteritems(plsd): if not pls: continue merged_id = "<#>".join([pl.ID for pl in pls]) pm = align_peaks(pls, ppm=ppm) plm = pm.to_peaklist(ID=merged_id) plm.metadata['parent'] = {1: pls[0].metadata['parent'][1]} merged_pls.append(plm) if out_pth: save_peaklists_as_hdf5(merged_pls, os.path.join(out_pth, '{}_merged_pls.hdf5'.format(name))) else: merged_pls = '' if ms1: mz, intensity = sort_lists(list(all_ms1_precursors.keys()), list(all_ms1_precursors.values())) ms1_precursors_pl = [PeakList(ID='ms1_precursors', mz=mz, intensity=intensity)] if out_pth: save_peaklists_as_hdf5(ms1_precursors_pl, os.path.join(out_pth, '{}_ms1_precursors_pl.hdf5'.format(name))) else: ms1_precursors_pl = '' return pls, merged_pls, ms1_precursors_pl
try: pm = align_peaks(pkls, ppm=2.0, block_size=1, fixed_block=True, edge_extend=10, ncpus=2) except Exception, e: self.fail('alignment failed: ' + str(e)) self._checkAlignmentResults(pm) pkls = self._createPeakLists() try: pm = align_peaks(pkls, ppm=2.0, block_size=20, fixed_block=True, edge_extend=10, ncpus=2) except Exception, e: self.fail('alignment failed: ' + str(e)) self._checkAlignmentResults(pm) def test_ppm(self): pkls = self._createPeakLists() try: pm = align_peaks(pkls, ppm=1e+10, block_size=5, fixed_block=True, edge_extend=10,