Beispiel #1
0
    def setUp(self):
        self.f1 = Peakel(1256.52, 0.0, 0.0, 1256.52)
        self.f1.area_by_sample_name = {'a': 102564, 'b': 130156, 'c': 150000, 'd': 10000}
        self.f1.charge = 1
        self.f1.polarity = -1

        self.f2 = Peakel(1258.52, 0.0, 0.0, 1258.52)
        self.f2.area_by_sample_name = {'a': 102564 / 10.0, 'b': 130156 / 10.0, 'c': 150000 / 10.0, 'd': 10000 / 10.0}

        self.f3 = Peakel(1261.52, 0.0, 0.0, 1261.52)
        self.f3.area_by_sample_name = {'a': 102564 / 2.0, 'b': 130156 / 2.3, 'c': 150000 / 2.25, 'd': 10000 / 1.89}

        self.f4 = Peakel(1262.52, 0.0, 0.0, 1262.52)
        self.f4.area_by_sample_name = {'a': 102564 * 3.0, 'b': 130156 / 1.8, 'c': 150000 / 4.1, 'd': 10000 * 0.5}

        self.f5 = Peakel(1274.52, 0.0, 0.0, 1274.52)
        self.f5.area_by_sample_name = {'a': 102564 * 12.23, 'b': 130156 * 30.0, 'c': 150000 * 33.0, 'd': 10000 * 44.0}

        self.f6 = Peakel(1275.52, 0.0, 0.0, 1275.52)
        self.f6.area_by_sample_name = {'a': 102564 * 44.0, 'b': 130156 * 60.0, 'c': 150000 * 66.0, 'd': 10000 * 88.0}

        self.f7 = Peakel(1281.52, 0.0, 0.0, 1281.52)
        self.f7.area_by_sample_name = {'a': 102564 * 6.0, 'b': 130156 * 4.56, 'c': 150000 / 78.0, 'd': 10000 / 1236.0}

        self.features = [self.f1, self.f2, self.f3, self.f4, self.f5, self.f6, self.f7]

        self.f1.isotopes.add(self.f2)
        self.f2.set_main_attribution(Attribution('isotope c13', self.f1.id, 1))
        self.f1.adducts.add(self.f3)
        self.f3.set_main_attribution(Attribution('[M+Na+]', self.f1.id, 1))
        t = ("acession", "name", "formula", "inchi", "mono_mass", "average_mass", "description", "status", "origin",
             "kegg_id", "isotopic_pattern_pos", "isotopic_pattern_neg")
        self.f1.annotations.append(Annotation(metabolite=Metabolite(*t), for_adduct='H2'))
Beispiel #2
0
    def _to_peakel_obj(self, d):
        """ convert csv data to peakel objects  """
        p = Peakel(float(d[PeakListReader.KEYS[0]]),
                   float(d[PeakListReader.KEYS[1]]),
                   float(d[PeakListReader.KEYS[2]]),
                   float(d[PeakListReader.KEYS[3]]),
                   float(d[PeakListReader.KEYS[4]]),
                   float(d[PeakListReader.KEYS[5]]))

        #  set the right polarity
        polarity = None
        if "Mode" in list(d.keys()):
            polarity = 1 if d["Mode"] == 'Positif' else -1
        else:
            p.polarity = self.exp_design.polarity

        #  remove keys
        for k in (PeakListReader.KEYS + self.directories + [
                "", "BIO", "mzmed", "rt.minutes", "Var", "Blc.Ext", "BLC",
                "Mode", "Correlation_Dilution_Log", "NOT_M.QC", "NOT_M.Blc",
                "NOT_QC.Blc", "NOT_CV..", "NOT_CV", "NOT_Correl", "Correl",
                "NOT_BIO.Blc", "NOT_nom", "rt.min", "Negatifs"
        ]):
            try:
                del d[k]
            except KeyError:
                pass

        #  assign area of each sample
        p.area_by_sample_name.update({a: float(b) for a, b in list(d.items())})
        p.area = np.median(list(p.area_by_sample_name.values()))

        if not p.area:
            p.area = np.mean(list(p.area_by_sample_name.values()))
            logging.debug(
                "an elution peak has the median of "
                "its area equals to 0 ! Using mean instead: {}.".format(
                    p.area))
        return p
Beispiel #3
0
    def test_dbscan_clustering_for_alignment(self):
        f1 = Peakel(1256.52, 0.0, 0.0, 100.0)
        f2 = Peakel(1258.52, 0.0, 0.0, 500.52)
        f3 = Peakel(1257.52, 0.0, 0.0, 101.52)
        f4 = Peakel(1600.52, 0.0, 0.0, 99.52)
        f7 = Peakel(1600.86, 0.0, 0.0, 107.12)
        f5 = Peakel(1600.52, 0.0, 0.0, 3205.52)
        f6 = Peakel(1456.52, 0.0, 0.0, 600.52)

        peakels = [f1, f2, f3, f4, f5, f6, f7]
        peakels_by_sample = {'a': {f1, f2, f4}, 'b': {f3, f5, f6}}

        sample_by_peakel = {f1: 'a', f2: 'a', f4: 'a', f3: 'b', f5: 'b', f6: 'b', f7: 'b'}
        values = [[x.moz, x.rt] for x in peakels]

        clusters = clusterize_dbscan(values, peakels, eps=5, min_samples=1)

        for c in clusters:
            for p in c:
                print(p.moz, p.rt, sample_by_peakel[p])
            print('\n')