예제 #1
0
    def test_special_peaklists(self):
        pkls = [
            PeakList('peaklist_' + str(i),
                     np.ones(10) * 10, np.ones(10)) for i in range(6)
        ]

        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=5,
                             fixed_block=False,
                             edge_extend=10,
                             ncpus=2)
        except Exception as e:
            self.fail('alignment failed: ' + str(e))

        self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.]))
        self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis=0), [6]))
        self.assertTrue(
            np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0), [60]))

        try:
            pm = align_peaks(pkls,
                             ppm=1e-10,
                             block_size=1,
                             fixed_block=True,
                             edge_extend=1,
                             ncpus=2)
        except Exception as e:
            self.fail('alignment failed: ' + str(e))

        self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.]))
        self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis=0), [6]))
        self.assertTrue(
            np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0), [60]))
예제 #2
0
def average_replicate_peaklists(pls,
                                ppm,
                                min_peaks,
                                rsd_thres=None,
                                block_size=5000,
                                ncpus=None):

    pm = align_peaks(pls, ppm, block_size, ncpus)

    prefix = os.path.commonprefix([p.ID for p in pls])
    merged_id = "{}{}".format(
        prefix, "_".join(
            map(str, [p.ID.replace(prefix, "").split(".")[0] for p in pls])))

    pl = pm.to_peaklist(ID=merged_id)
    if "snr" in pm.attributes:
        pl.add_attribute("snr", pm.attr_mean_vector("snr"), on_index=2)

    pl.add_attribute("rsd", pm.rsd(flagged_only=False), on_index=5)
    pl.add_attribute("present_flag", pm.present >= min_peaks, is_flag=True)

    if rsd_thres is not None:
        rsd_flag = map(lambda x: not np.isnan(x) and x < rsd_thres,
                       pl.get_attribute("rsd", flagged_only=False))
        pl.add_attribute("rsd_flag",
                         rsd_flag,
                         flagged_only=False,
                         is_flag=True)

    return pl
예제 #3
0
    def test_peak_matrix_portal(self):
        pkls = self._createPeaklists()
        pm = align_peaks(pkls, ppm=2.0, block_size=10, ncpus=2)

        pm.mask_tags('qc')

        pnum = pm.full_shape[1]
        pm.add_flag('odd_flag', ([0, 1] * int(pnum / 2. + 1))[:pnum])
        pm.add_flag('qua_flag', ([0, 0, 0, 1] * int(pnum / 4. + 1))[:pnum],
                    flagged_only=False)

        save_peak_matrix_as_hdf5(pm, '.test_peak_matrix.hdf5')
        npm = load_peak_matrix_from_hdf5('.test_peak_matrix.hdf5')

        self.assertEqual(pm.shape, npm.shape)
        self.assertEqual(pm.full_shape, npm.full_shape)
        self.assertTupleEqual(pm.attributes, npm.attributes)
        self.assertTrue(np.allclose(pm.mz_matrix, npm.mz_matrix))
        self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix))
        self.assertTrue(
            np.allclose(pm.attr_matrix('snr'), npm.attr_matrix('snr')))
        self.assertTrue(
            np.all(pm.attr_matrix('lab') == npm.attr_matrix('lab')))
        self.assertTrue(
            np.all(
                pm.property('present_matrix', flagged_only=False) ==
                npm.property('present_matrix', flagged_only=False)))
        self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types)
        self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
        self.assertTrue(np.all(pm.mask == npm.mask))
        self.assertTrue(
            np.all(pm.flag_values('odd_flag') == npm.flag_values('odd_flag')))
        self.assertTrue(
            np.all(pm.flag_values('qua_flag') == npm.flag_values('qua_flag')))
        self.assertTrue(np.all(pm.flags == npm.flags))
예제 #4
0
    def test_ppm(self):
        pkls = self._createPeakLists()

        try:
            pm = align_peaks(pkls,
                             ppm=1e+10,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception as e:
            self.fail('alignment failed: ' + str(e))

        self.assertTrue(
            np.allclose(
                pm.to_peaklist('merged').mz,
                [np.mean(list(map(np.mean, self.mz)))]))
        self.assertTrue(
            np.allclose(pm.intensity_matrix.flatten(),
                        list(map(np.mean, self.ints))))
        self.assertTrue(
            np.allclose(
                pm.attr_matrix('intra_count').flatten(),
                list(map(len, self.mz))))

        try:
            pm = align_peaks(pkls,
                             ppm=1e-10,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception as e:
            self.fail('alignment failed: ' + str(e))

        self.assertTrue(
            np.allclose(
                pm.to_peaklist('merged').mz,
                np.sort(reduce(lambda x, y: x + y, list(map(list, self.mz))))))
        self.assertTrue(
            np.allclose(np.sort(np.sum(pm.intensity_matrix, axis=0)),
                        np.sort(reduce(lambda x, y: x + y, self.ints))))
        self.assertTrue(
            np.allclose(np.sum(pm.attr_matrix('intra_count'), axis=0),
                        np.ones(pm.shape[1])))
예제 #5
0
 def test_block_size(self):
     pkls = self._createPeakLists()
     try:
         pm = align_peaks(pkls,
                          ppm=2.0,
                          block_size=1,
                          fixed_block=True,
                          edge_extend=10,
                          ncpus=2)
     except Exception, e:
         self.fail('alignment failed: ' + str(e))
예제 #6
0
    def test_peak_matrix_portal(self):
        _mzs = lambda: sorted(np.random.uniform(100, 1200, size=100))
        _ints = lambda: np.abs(np.random.normal(100, 10, size=100))

        pkls = [
            PeakList('sample_1_1', _mzs(), _ints()),
            PeakList('sample_1_2', _mzs(), _ints()),
            PeakList('QC_1', _mzs(), _ints()),
            PeakList('sample_2_1', _mzs(), _ints()),
            PeakList('sample_2_2', _mzs(), _ints()),
            PeakList('QC_2', _mzs(), _ints()),
        ]
        for t in ('sample', Tag('compound_1', 'treatment'),
                  Tag('1hr', 'time_point'), Tag(1, 'plate')):
            pkls[0].tags.add_tag(t)
        for t in ('sample', Tag('compound_1', 'treatment'),
                  Tag('6hr', 'time_point'), Tag(1, 'plate')):
            pkls[1].tags.add_tag(t)
        for t in ('qc', Tag(1, 'plate')):
            pkls[2].tags.add_tag(t)
        for t in ('sample', Tag('compound_2', 'treatment'),
                  Tag('1hr', 'time_point'), Tag(2, 'plate')):
            pkls[3].tags.add_tag(t)
        for t in ('sample', Tag('compound_2', 'treatment'),
                  Tag('6hr', 'time_point'), Tag(2, 'plate')):
            pkls[4].tags.add_tag(t)
        for t in ('qc', Tag(2, 'plate')):
            pkls[5].tags.add_tag(t)

        pm = align_peaks(pkls, ppm=2e+4, block_size=10, ncpus=2)
        pm.add_flag('odd_flag',
                    ([0, 1] * int(pm.shape[1] / 2 + 1))[:pm.shape[1]])
        pm.add_flag('qua_flag',
                    ([0, 0, 1, 1] * int(pm.shape[1] / 4 + 1))[:pm.shape[1]])

        save_peak_matrix_as_txt(pm,
                                '.test_peak_matrix.txt',
                                samples_in_rows=True,
                                comprehensive=True,
                                rsd_tags=('qc', Tag('compound_1', 'treatment'),
                                          Tag('compound_2', 'treatment')))
        npm = load_peak_matrix_from_txt('.test_peak_matrix.txt',
                                        samples_in_rows=True,
                                        comprehensive='auto')

        self.assertEqual(pm.shape, npm.shape)
        self.assertEqual(pm.full_shape, npm.full_shape)
        self.assertTrue(np.all(pm.flags == npm.flags))
        self.assertTrue(np.all(pm.flag_names == npm.flag_names))
        self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix))
        self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types)
        self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
예제 #7
0
    def test_normal_alignment(self):
        pkls = self._createPeakLists()

        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
            # print pm.attr_matrix('str_attr')
            # print pm.attr_mean_vector('str_attr')
        except Exception, e:
            self.fail('alignment failed: ' + str(e))
예제 #8
0
def average_replicate_scans(name,
                            pls,
                            ppm=2.0,
                            min_fraction=0.8,
                            rsd_thres=30.0,
                            block_size=5000,
                            ncpus=None):

    emlst = np.array(map(lambda x: x.size == 0, pls))
    if np.sum(emlst) > 0:
        logging.warning(
            'No scan data available for [%s]' %
            join(map(str, [p.ID for e, p in zip(emlst, pls) if e]), ','))
        pls = [p for e, p in zip(emlst, pls) if not e]

    pm = align_peaks(pls, ppm=ppm, block_size=block_size, ncpus=ncpus)

    pl_avg = pm.to_peaklist(ID=name)
    # meta data
    for pl in pls:
        for k, v in pl.metadata.items():
            if k not in pl_avg.metadata:
                pl_avg.metadata[k] = []
            if v is not None:
                pl_avg.metadata[k].append(v)

    pl_avg.add_attribute("snr", pm.attr_mean_vector('snr'), on_index=2)
    pl_avg.add_attribute("snr_flag",
                         np.ones(pl_avg.full_size),
                         flagged_only=False,
                         is_flag=True)

    pl_avg.add_attribute("rsd", pm.rsd(flagged_only=False), on_index=5)

    if min_fraction is not None:
        pl_avg.add_attribute("fraction_flag",
                             (pm.present / float(pm.shape[0])) >= min_fraction,
                             flagged_only=False,
                             is_flag=True)
    if rsd_thres is not None:
        if pm.shape[0] == 1:
            logging.warning(
                'applying RSD filter on single scan, all peaks removed')
        rsd_flag = map(lambda x: not np.isnan(x) and x < rsd_thres,
                       pl_avg.get_attribute("rsd", flagged_only=False))
        pl_avg.add_attribute("rsd_flag",
                             rsd_flag,
                             flagged_only=False,
                             is_flag=True)
    return pl_avg
예제 #9
0
    def test_special_peaklists(self):
        pkls = [
            PeakList('peaklist_' + str(i),
                     np.ones(10) * 10, np.ones(10)) for i in range(6)
        ]

        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=5,
                             fixed_block=False,
                             edge_extend=10,
                             ncpus=2)
        except Exception, e:
            self.fail('alignment failed: ' + str(e))
예제 #10
0
    def test_single_peaklist(self):
        pkls = [
            PeakList('peaklist_0', np.arange(10, 110, step=10),
                     np.arange(10) + 11)
        ]

        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception, e:
            self.fail('alignment failed: ' + str(e))
예제 #11
0
    def test_single_peaklist(self):
        pkls = [
            PeakList('peaklist_0', np.arange(10, 110, step=10),
                     np.arange(10) + 11)
        ]

        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception as e:
            self.fail('alignment failed: ' + str(e))

        self.assertTrue(
            np.allclose(
                pm.to_peaklist('merged').mz, np.arange(10, 110, step=10)))
        self.assertTrue(np.allclose(pm.intensity_matrix, [np.arange(10) + 11]))
예제 #12
0
def average_replicate_scans(name,
                            pls,
                            ppm=2.0,
                            min_fraction=0.8,
                            rsd_thres=30.0,
                            rsd_on="intensity",
                            block_size=5000,
                            ncpus=None):

    emlst = np.array([x.size == 0 for x in pls])
    if np.sum(emlst) > 0:
        logging.warning('No scan data available for {}'.format(
            str([p.ID for e, p in zip(emlst, pls) if e])))
        pls = [p for e, p in zip(emlst, pls) if not e]

    pm = align_peaks(pls, ppm=ppm, block_size=block_size, ncpus=ncpus)

    pl_avg = pm.to_peaklist(ID=name)
    # meta data
    for pl in pls:
        for k, v in list(pl.metadata.items()):
            if k not in pl_avg.metadata:
                pl_avg.metadata[k] = []
            if v is not None:
                pl_avg.metadata[k].append(v)

    if rsd_on != "intensity":
        pl_avg.add_attribute(rsd_on, pm.attr_mean_vector(rsd_on), on_index=2)
        rsd_label = "rsd_{}".format(rsd_on)
        shift = 1
    else:
        rsd_label = "rsd"
        shift = 0

    pl_avg.add_attribute("snr", pm.attr_mean_vector('snr'), on_index=2 + shift)
    pl_avg.add_attribute("snr_flag",
                         np.ones(pl_avg.full_size),
                         flagged_only=False,
                         is_flag=True)

    pl_avg.add_attribute(rsd_label,
                         pm.rsd(on_attr=rsd_on, flagged_only=False),
                         on_index=5 + shift)

    if min_fraction is not None:
        pl_avg.add_attribute("fraction_flag",
                             (pm.present / float(pm.shape[0])) >= min_fraction,
                             flagged_only=False,
                             is_flag=True)
    if rsd_thres is not None:
        if pm.shape[0] == 1:
            logging.warning(
                'applying RSD filter on single scan, all peaks removed')
        rsd_flag = [
            not np.isnan(x) and x < rsd_thres
            for x in pl_avg.get_attribute(rsd_label, flagged_only=False)
        ]
        pl_avg.add_attribute("{}_flag".format(rsd_label),
                             rsd_flag,
                             flagged_only=False,
                             is_flag=True)
    return pl_avg
예제 #13
0
def tree2peaklist(tree_pth, adjust_mz=True, merge=True, ppm=5, ms1=True, out_pth='', name=''):
    ####################################################################################################################
    # Extract peaklists from msnpy
    ####################################################################################################################
    trees = load_trees(tree_pth)
    plsd = {}
    all_ms1_precursors = {}

    # get peaklist for each header
    for tree in trees:
        plsd[tree.graph['id']] = []

        # For each tree we look at each "header" e.g. the same mass spectrometry data (processed prior by dimspy-msnpy)
        # And create a peaklist for each header. (....probably a better way of doing this perhaps iterating through
        # the tree instead?). Anyway this seems to work OK.
        its = tree.nodes.items()
        # add id to tree values
        [i[1].update({'id': i[0]}) for i in its]
        tv = [i[1] for i in its]
        # requires sorting for itertools.groupby to work properly
        tv = sorted(tv, key=lambda i: i['header'])

        for header, group in itertools.groupby(tv, key=lambda x: x['header']):
            # get mz, intensity, mass, molecular formula, adduct

            mtch = re.search('.*Full ms .*', header)
            if mtch:
                # full scan
                continue

            precursor_detail_track = []

            mz = []
            intensity = []
            mass = []
            mf = []
            adduct = []

            metad = {'tree_id': tree.graph['id'], 'header': header, 'parent': {}}

            for d in list(group):

                # get precursor details for each level
                for n in tree.predecessors(d['id']):

                    pd = tree.nodes.get(n)

                    # check if we already have this precursor details
                    if pd['mslevel'] in precursor_detail_track:
                        continue

                    metad['parent'][pd['mslevel']] = {}
                    metad['parent'][pd['mslevel']]['mz'] = pd['mz']
                    if 'mf' in pd:
                        mf_details_p = get_mf_details(pd)
                        metad['parent'][pd['mslevel']]['mass'] = mf_details_p['mass']
                        metad['parent'][pd['mslevel']]['adduct'] = mf_details_p['adduct']
                        metad['parent'][pd['mslevel']]['mf'] = mf_details_p['mf']

                    precursor_detail_track.append(pd['mslevel'])

                    if ms1:
                        if adjust_mz:
                            all_ms1_precursors[mf_details_p['mass']] = pd['intensity']
                        else:
                            all_ms1_precursors[pd['mz']] = pd['intensity']

                mz.append(d['mz'])
                intensity.append(d['intensity'])

                if 'mf' in d:
                    mf_details = get_mf_details(d)
                    mass.append(mf_details['mass'])
                    mf.append(mf_details['mf'])
                    adduct.append(mf_details['adduct'])

            if len(mz)<1:
                continue

            if adjust_mz:
                mza = mass
            else:
                mza = mz

            # create dimspy array object
            if mf:
                mza, intensity, mass, mf, adduct = sort_lists(mza, intensity, mass, mf, adduct)
            else:
                mza, intensity = sort_lists(mza, intensity)


            pl = PeakList(ID='{}: {}'.format(tree.graph['id'], header),
                          mz=mza,
                          intensity=intensity,
                          **metad)
            print(pl)
            if mf:
                pl.add_attribute('mass', mass)
                pl.add_attribute('mz_original', mz)
                pl.add_attribute('mf', mf)
                pl.add_attribute('adduct', adduct)

            plsd[tree.graph['id']].append(pl)

    pls = [y for x in list(plsd.values()) for y in x]

    if out_pth:
        save_peaklists_as_hdf5(pls, os.path.join(out_pth, '{}_non_merged_pls.hdf5'.format(name)))


    # Merge
    if merge:
        merged_pls = []
        for (key, pls) in iteritems(plsd):

            if not pls:
                continue
            merged_id = "<#>".join([pl.ID for pl in pls])
            pm = align_peaks(pls, ppm=ppm)
            plm = pm.to_peaklist(ID=merged_id)
            plm.metadata['parent'] = {1: pls[0].metadata['parent'][1]}

            merged_pls.append(plm)

        if out_pth:
            save_peaklists_as_hdf5(merged_pls, os.path.join(out_pth, '{}_merged_pls.hdf5'.format(name)))
    else:
        merged_pls = ''

    if ms1:
        mz, intensity = sort_lists(list(all_ms1_precursors.keys()), list(all_ms1_precursors.values()))
        ms1_precursors_pl = [PeakList(ID='ms1_precursors',
                                      mz=mz,
                                      intensity=intensity)]
        if out_pth:
            save_peaklists_as_hdf5(ms1_precursors_pl, os.path.join(out_pth, '{}_ms1_precursors_pl.hdf5'.format(name)))
    else:
        ms1_precursors_pl = ''

    return pls, merged_pls, ms1_precursors_pl
예제 #14
0
        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=1,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception, e:
            self.fail('alignment failed: ' + str(e))
        self._checkAlignmentResults(pm)

        pkls = self._createPeakLists()
        try:
            pm = align_peaks(pkls,
                             ppm=2.0,
                             block_size=20,
                             fixed_block=True,
                             edge_extend=10,
                             ncpus=2)
        except Exception, e:
            self.fail('alignment failed: ' + str(e))
        self._checkAlignmentResults(pm)

    def test_ppm(self):
        pkls = self._createPeakLists()

        try:
            pm = align_peaks(pkls,
                             ppm=1e+10,
                             block_size=5,
                             fixed_block=True,
                             edge_extend=10,