Esempio n. 1
0
 def test_add_xic(self):
     m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(molecule=m1, inventory_id="0")
     s1.save()
     a1 = Adduct(nM=1, delta_formula='+H+K', charge=-2)
     a1.save()
     d1 = Dataset(name='Dataset1')
     d1.save()
     d1.standards_present.add(s1)
     d1.adducts_present.add(a1)
     x1 = Xic(mz='0.0', dataset=d1)
     xic = [1.0, 2.0, 3.0, 4.0, 5.0]
     x1.set_xic(xic)
     x1.save()
     self.assertEqual(Xic.objects.all().count(), 1)
     np.testing.assert_array_almost_equal(xic, x1.xic)
Esempio n. 2
0
    def test_xic_and_standard_and_adduct(self):
        # create some datasets
        d1 = Dataset(name='Dataset1')
        d1.save()
        a1 = Adduct(nM=1, delta_formula='-H', charge=-1)
        a1.save()
        m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3")
        m1.save()
        s1 = Standard(molecule=m1, inventory_id="0")
        s1.save()
        # create some xics
        x1 = Xic(mz=60.993, dataset=d1)
        xic = [1.0, 2.0, 3.0, 4.0, 5.0]
        x1.set_xic(xic)
        x1.standard = s1
        x1.adduct = a1
        x1.save()
        self.assertEqual(Xic.objects.all().count(), 1)
        self.assertEqual(Dataset.objects.all().count(), 1)
        self.assertEqual(Standard.objects.all().count(), 1)
        # mass check
        with self.assertRaises(ValueError):
            x1.mz = 123.993
            x1.save()
            x1.check_mass()

        def test_xic_mass_filter(self):
            d1 = Dataset(name='dataset')
            d1.save()
            mz = 60.993
            # three larger
            Xic(mz=mz + 5., dataset=d1).save()
            Xic(mz=mz + 10., dataset=d1).save()
            Xic(mz=mz + 15., dataset=d1).save()
            # three approx equal
            Xic(mz=mz + 0.005, dataset=d1).save()
            Xic(mz=mz + 0.0, dataset=d1).save()
            Xic(mz=mz - 0.0015, dataset=d1).save()
            # three smaller
            Xic(mz=mz - 5., dataset=d1).save()
            Xic(mz=mz - 10., dataset=d1).save()
            Xic(mz=mz - 15., dataset=d1).save()
            # three approx equal from another dataset
            d2 = Dataset(name='dataset2')
            d2.save()
            Xic(mz=mz + 0.005, dataset=d2).save()
            Xic(mz=mz + 0.0, dataset=d2).save()
            Xic(mz=mz - 0.0015, dataset=d2).save()
            self.assertEqual(Xic.objects.all().count(), 12)
            xics = Xic.objects.all().filter(dataset=d1).filter(mz__gte=mz + 0.01).filter(mz__lte=mz - 0.01)
            self.assertEqual(xics.objects.all().count(), 3)
Esempio n. 3
0
def handle_uploaded_files(metadata, mzml_filepath, d):
    logger = logging.getLogger(__file__ + str(d.id))
    logger.addHandler(DatabaseLogHandler(d, level=logging.ERROR))

    msrun = pymzml.run.Reader(mzml_filepath)
    ppm = float(metadata['mass_accuracy_ppm'])
    mz_tol_quad = float(metadata['quad_window_mz'])
    scan_time = []
    standards = Standard.objects.all().filter(pk__in=metadata['standards'])
    adducts = Adduct.objects.all().filter(pk__in=metadata['adducts'])
    mz_upper = {}
    mz_lower = {}
    mz = {}
    logger.debug(standards.count())
    for standard in standards:
        mz_upper[standard] = {}
        mz_lower[standard] = {}
        mz[standard] = {}
        for adduct in adducts:
            mz[standard][adduct] = standard.molecule.get_mz(adduct)
            logger.debug(standard)
            logger.debug(mz[standard][adduct])
            delta_mz = mz[standard][adduct] * ppm * 1e-6
            mz_upper[standard][adduct] = mz[standard][adduct] + delta_mz
            mz_lower[standard][adduct] = mz[standard][adduct] - delta_mz
    logger.debug('adding dataset')
    try:
        lc_info = metadata['lc_info']
        ms_info = metadata['ms_info']
    except LookupError:
        logger.debug('no instrument information supplied; using empty string instead')
        lc_info = ms_info = ''

    lc_info_stripped = set()
    ms_info_stripped = set()
    for lc in lc_info.split(', '):
        if lc:
            lc_stripped = lc.replace(',', '').strip()
            lc_info_stripped.add(lc_stripped)
    for ms in ms_info.split(', '):
        if ms:
            ms_stripped = ms.replace(',', '').strip()
            ms_info_stripped.add(ms_stripped)
    for lc in lc_info_stripped:
        lc_obj = LcInfo.objects.get_or_create(content=lc)[0]
        if lc_obj not in d.lc_info.all():
            d.lc_info.add(lc_obj)
    for ms in ms_info_stripped:
        ms_obj = MsInfo.objects.get_or_create(content=ms)[0]
        if ms_obj not in d.ms_info.all():
            d.ms_info.add(ms_obj)

    d.mass_accuracy_ppm = ppm
    d.save()
    for standard in standards:
        d.standards_present.add(standard)
    for adduct in adducts:
        d.adducts_present.add(adduct)
    d.save()
    logger.debug('adding msms')
    xics = {}
    spec_n = 0
    for spectrum in msrun:
        spec_n += 1
        if spectrum['ms level'] == 1:
            scan_time.append(spectrum['scan start time'])
        # Iterate adducts/standards and get values as required
        for standard in standards:
            if standard not in xics:
                xics[standard] = {}
            for adduct in adducts:
                if adduct not in xics[standard]:
                    xics[standard][adduct] = []
                if spectrum['ms level'] == 1:
                    x = 0
                    for m, i in spectrum.centroidedPeaks:
                        if all([m >= mz_lower[standard][adduct], m <= mz_upper[standard][adduct]]):
                            x += i
                    xics[standard][adduct].append(x)
                if spectrum['ms level'] == 2:
                    add_msms = False
                    pre_mz = float(spectrum['precursors'][0]['mz'])
                    mz_tol_this_adduct = mz[standard][adduct] * ppm * 1e-6
                    if any((abs(pre_mz - mz[standard][adduct]) <= mz_tol_this_adduct,
                            abs(pre_mz - mz[standard][adduct]) <= mz_tol_quad)):  # frag spectrum probably the target
                        add_msms = True
                    if add_msms:
                        ms1_intensity = xics[standard][adduct][-1]
                        mzs = spectrum.mz
                        ints = spectrum.i
                        quad_ints = [ii for m, ii in zip(mzs, ints) if
                                     all((m >= pre_mz - mz_tol_quad, m <= pre_mz + mz_tol_quad))]
                        ppm_ints = [ii for m, ii in zip(mzs, ints) if
                                    all((m >= pre_mz - mz_tol_this_adduct, m <= pre_mz + mz_tol_this_adduct))]
                        quad_ints_sum = sum(quad_ints)
                        ppm_ints_sum = sum(ppm_ints)
                        ce_type = ''
                        ce_energy = ''
                        ce_gas = ''
                        for element in spectrum.xmlTree:
                            if element.get('accession') == "MS:1000133":
                                ce_type = element.items()
                            elif element.get('accession') == "MS:1000045":
                                ce_energy = dict(element.items())
                        ce_str = "{} {} {}".format(ce_energy['name'], ce_energy['value'], ce_energy['unitName'])
                        if ppm_ints_sum == 0:
                            pre_fraction = 0
                        else:
                            pre_fraction = sum(ppm_ints) / sum(quad_ints)
                        f = FragmentationSpectrum(precursor_mz=pre_mz,
                                                  rt=spectrum['scan start time'], dataset=d, spec_num=spec_n,
                                                  precursor_quad_fraction=pre_fraction, ms1_intensity=ms1_intensity,
                                                  collision_energy=ce_str)
                        f.set_centroid_mzs(spectrum.mz)
                        f.set_centroid_ints(spectrum.i)
                        f.collision = ce_str
                        f.save()
    logger.debug("adding xics")
    for standard in standards:
        for adduct in adducts:
            # if np.sum(xics[standard][adduct]) > 0:
            x = Xic(mz=standard.molecule.get_mz(adduct), dataset=d)
            x.set_xic(xics[standard][adduct])
            x.set_rt(scan_time)
            x.standard = standard
            x.adduct = adduct
            x.save()
    d.processing_finished = True
    d.save()
    logger.debug('done')
    logger.debug("added = True")
    return True