def test_is_not_empty_if_annotated(self): m1 = Molecule(name='test', sum_formula="C1H2O3") m1.save() s1 = Standard(molecule=m1) s1.save() d1 = Dataset() d1.save() fs1 = FragmentationSpectrum(ms1_intensity=42, dataset=d1, standard=s1) fs1.save() molecule_table, molecules_with_spectra = self.get_table_and_count() self.assertEqual(len(molecule_table.rows), 1) self.assertEqual(molecules_with_spectra, 1)
def test_make_FragmentationSpectrum_with_centroids(self): d1 = Dataset(name='Dataset1') d1.save() f1 = FragmentationSpectrum(precursor_mz='123.456', spec_num=0, dataset=d1) mzs = [10., 20, 50] ints = [1., 1., 1.] f1.set_centroid_mzs(mzs) f1.set_centroid_ints(ints) f1.save() np.testing.assert_array_almost_equal(mzs, f1.centroid_mzs) np.testing.assert_array_almost_equal(ints, f1.centroid_ints)
def handle_uploaded_files(metadata, mzml_filepath, d): logger = logging.getLogger(__file__ + str(d.id)) logger.addHandler(DatabaseLogHandler(d, level=logging.ERROR)) msrun = pymzml.run.Reader(mzml_filepath) ppm = float(metadata['mass_accuracy_ppm']) mz_tol_quad = float(metadata['quad_window_mz']) scan_time = [] standards = Standard.objects.all().filter(pk__in=metadata['standards']) adducts = Adduct.objects.all().filter(pk__in=metadata['adducts']) mz_upper = {} mz_lower = {} mz = {} logger.debug(standards.count()) for standard in standards: mz_upper[standard] = {} mz_lower[standard] = {} mz[standard] = {} for adduct in adducts: mz[standard][adduct] = standard.molecule.get_mz(adduct) logger.debug(standard) logger.debug(mz[standard][adduct]) delta_mz = mz[standard][adduct] * ppm * 1e-6 mz_upper[standard][adduct] = mz[standard][adduct] + delta_mz mz_lower[standard][adduct] = mz[standard][adduct] - delta_mz logger.debug('adding dataset') try: lc_info = metadata['lc_info'] ms_info = metadata['ms_info'] except LookupError: logger.debug('no instrument information supplied; using empty string instead') lc_info = ms_info = '' lc_info_stripped = set() ms_info_stripped = set() for lc in lc_info.split(', '): if lc: lc_stripped = lc.replace(',', '').strip() lc_info_stripped.add(lc_stripped) for ms in ms_info.split(', '): if ms: ms_stripped = ms.replace(',', '').strip() ms_info_stripped.add(ms_stripped) for lc in lc_info_stripped: lc_obj = LcInfo.objects.get_or_create(content=lc)[0] if lc_obj not in d.lc_info.all(): d.lc_info.add(lc_obj) for ms in ms_info_stripped: ms_obj = MsInfo.objects.get_or_create(content=ms)[0] if ms_obj not in d.ms_info.all(): d.ms_info.add(ms_obj) d.mass_accuracy_ppm = ppm d.save() for standard in standards: d.standards_present.add(standard) for adduct in adducts: d.adducts_present.add(adduct) d.save() logger.debug('adding msms') xics = {} spec_n = 0 for spectrum in msrun: spec_n += 1 if spectrum['ms level'] == 1: scan_time.append(spectrum['scan start time']) # Iterate adducts/standards and get values as required for standard in standards: if standard not in xics: xics[standard] = {} for adduct in adducts: if adduct not in xics[standard]: xics[standard][adduct] = [] if spectrum['ms level'] == 1: x = 0 for m, i in spectrum.centroidedPeaks: if all([m >= mz_lower[standard][adduct], m <= mz_upper[standard][adduct]]): x += i xics[standard][adduct].append(x) if spectrum['ms level'] == 2: add_msms = False pre_mz = float(spectrum['precursors'][0]['mz']) mz_tol_this_adduct = mz[standard][adduct] * ppm * 1e-6 if any((abs(pre_mz - mz[standard][adduct]) <= mz_tol_this_adduct, abs(pre_mz - mz[standard][adduct]) <= mz_tol_quad)): # frag spectrum probably the target add_msms = True if add_msms: ms1_intensity = xics[standard][adduct][-1] mzs = spectrum.mz ints = spectrum.i quad_ints = [ii for m, ii in zip(mzs, ints) if all((m >= pre_mz - mz_tol_quad, m <= pre_mz + mz_tol_quad))] ppm_ints = [ii for m, ii in zip(mzs, ints) if all((m >= pre_mz - mz_tol_this_adduct, m <= pre_mz + mz_tol_this_adduct))] quad_ints_sum = sum(quad_ints) ppm_ints_sum = sum(ppm_ints) ce_type = '' ce_energy = '' ce_gas = '' for element in spectrum.xmlTree: if element.get('accession') == "MS:1000133": ce_type = element.items() elif element.get('accession') == "MS:1000045": ce_energy = dict(element.items()) ce_str = "{} {} {}".format(ce_energy['name'], ce_energy['value'], ce_energy['unitName']) if ppm_ints_sum == 0: pre_fraction = 0 else: pre_fraction = sum(ppm_ints) / sum(quad_ints) f = FragmentationSpectrum(precursor_mz=pre_mz, rt=spectrum['scan start time'], dataset=d, spec_num=spec_n, precursor_quad_fraction=pre_fraction, ms1_intensity=ms1_intensity, collision_energy=ce_str) f.set_centroid_mzs(spectrum.mz) f.set_centroid_ints(spectrum.i) f.collision = ce_str f.save() logger.debug("adding xics") for standard in standards: for adduct in adducts: # if np.sum(xics[standard][adduct]) > 0: x = Xic(mz=standard.molecule.get_mz(adduct), dataset=d) x.set_xic(xics[standard][adduct]) x.set_rt(scan_time) x.standard = standard x.adduct = adduct x.save() d.processing_finished = True d.save() logger.debug('done') logger.debug("added = True") return True