def test_TreeDiaController_percentile(self, simple_dataset): logger.info('Testing TreeDiaController percentile') # some parameters window_type = 'percentile' kaufmann_design = 'tree' num_windows = 64 scan_overlap = 0 ionisation_mode = POSITIVE scan_time_dict = {1: 0.12, 2: 0.06} min_rt = 0 max_rt = 400 min_mz = 100 max_mz = 1000 # run controller mass_spec = IndependentMassSpectrometer(ionisation_mode, simple_dataset, scan_duration=scan_time_dict) controller = DiaController(min_mz, max_mz, window_type, kaufmann_design, num_windows, scan_overlap=scan_overlap) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) set_log_level_warning() env.run() # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'tree_dia_percentile.mzml' check_mzML(env, OUT_DIR, filename)
def test_multiple_adducts(self): fs = DatabaseFormulaSampler(HMDB) ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101) cs = ConstantChromatogramSampler() adduct_prior_dict = {POSITIVE: {'M+H': 100, 'M+Na': 100, 'M+K': 100}} cm = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cs, adduct_prior_dict=adduct_prior_dict, adduct_proportion_cutoff=0.0) n_adducts = len(adduct_prior_dict[POSITIVE]) n_chems = 5 dataset = cm.sample(n_chems, 2) for c in dataset: c.isotopes = [(c.mass, 1, "Mono")] # should be 15 peaks or less all the time # some adducts might not be sampled if the probability is less than 0.2 controller = SimpleMs1Controller() ms = IndependentMassSpectrometer(POSITIVE, dataset) env = Environment(ms, controller, 102, 110, progress_bar=True) set_log_level_warning() env.run() for scan in controller.scans[1]: assert len(scan.mzs) <= n_chems * n_adducts
def test_swath(self, ten_chems): min_mz = 100 max_mz = 1000 width = 100 scan_overlap = 10 ionisation_mode = POSITIVE controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap) scan_time_dict = {1: 0.124, 2: 0.124} spike_noise = UniformSpikeNoise(0.1, 1) mass_spec = IndependentMassSpectrometer(ionisation_mode, ten_chems, spike_noise=spike_noise, scan_duration=scan_time_dict) env = Environment(mass_spec, controller, 200, 300, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS2(controller) filename = 'SWATH_ten_chems.mzML' check_mzML(env, OUT_DIR, filename)
def test_mean_scan_time_from_mzml(self): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # extract chemicals from mzML roi_params = RoiParams(min_intensity=10, min_length=5) cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params) chems = cm.sample(None, 2) # extract mean timing per scan level from mzML sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True) ms = IndependentMassSpectrometer(ionisation_mode, chems, scan_duration=sd) # run simulation env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_mean_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def test_negative_fixed(self): fs = EvenMZFormulaSampler() ms = FixedMS2Sampler() ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101) cs = ConstantChromatogramSampler() cm = ChemicalMixtureCreator(fs, ms2_sampler=ms, rt_and_intensity_sampler=ri, chromatogram_sampler=cs) dataset = cm.sample(3, 2) N = 10 isolation_width = 0.7 mz_tol = 10 rt_tol = 15 ms = IndependentMassSpectrometer(NEGATIVE, dataset) controller = TopNController(NEGATIVE, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) env = Environment(ms, controller, 102, 110, progress_bar=True) set_log_level_warning() env.run() ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs] expected_vals = [98, 198, 298] for i, m in enumerate(ms1_mz_vals): assert m == expected_vals[i] expected_frags = set([88, 78, 188, 178, 288, 278]) for scan in controller.scans[2]: for m in scan.mzs: assert int(m) in expected_frags
def test_aif_with_fixed_chems(self): fs = EvenMZFormulaSampler() ms = FixedMS2Sampler(n_frags=2) cs = ConstantChromatogramSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=1) cs = ChemicalMixtureCreator(fs, ms2_sampler=ms, chromatogram_sampler=cs, rt_and_intensity_sampler=ri) d = cs.sample(1, 2) ms1_source_cid_energy = 30 controller = AIF(ms1_source_cid_energy) ionisation_mode = POSITIVE mass_spec = IndependentMassSpectrometer(ionisation_mode, d) env = Environment(mass_spec, controller, 10, 20, progress_bar=True) set_log_level_warning() env.run() for i, s in enumerate(controller.scans[1]): if i % 2 == 1: # odd scan, AIF, should have two peaks at 81 and 91 integer_mzs = [int(i) for i in s.mzs] integer_mzs.sort() assert integer_mzs[0] == 81 assert integer_mzs[1] == 91 else: # even scan, MS1 - should have a single peak at integer value of 101 integer_mzs = [int(i) for i in s.mzs] assert integer_mzs[0] == 101
def test_fullscan_from_mzml(self, chems_from_mzml): ionisation_mode = POSITIVE controller = SimpleMs1Controller() ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'fullscan_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def run_environment(env): # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation logger.info('Running simulation') env.run() logger.info('Done') # set the log level back to DEBUG set_log_level_debug()
def test_acquisition(self, two_fixed_chems): mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems] schedule = [] # env = Environment() isolation_width = DEFAULT_ISOLATION_WIDTH mz_tol = 0.1 rt_tol = 15 min_rt = 110 max_rt = 112 ionisation_mode = POSITIVE controller = FixedScansController() mass_spec = IndependentMassSpectrometer(ionisation_mode, two_fixed_chems) env = Environment(mass_spec, controller, min_rt, max_rt) ms1_scan = get_default_scan_params(polarity=ionisation_mode) ms2_scan_1 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_2 = get_dda_scan_param(mz_to_target[1], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_3 = get_dda_scan_param(mz_to_target, [0.0, 0.0], None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3] controller.set_tasks(schedule) set_log_level_warning() env.run() assert len(controller.scans[2]) == 3 n_peaks = [] for scan in controller.scans[2]: n_peaks.append(scan.num_peaks) assert n_peaks[0] > 0 assert n_peaks[1] > 0 assert n_peaks[2] == n_peaks[0] + n_peaks[1] env.write_mzML(OUT_DIR, 'multi_windows.mzML')
def test_swath_more(self, even_chems): """ Tests SWATH by making even chemicals and then varying the SWATH window so that in the first example each chemical is in its own window, in the second each window holds two chems and in the third, one window holds them all """ ionisation_mode = POSITIVE min_mz = 50 max_mz = 460 width = 100 scan_overlap = 0 controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap) scan_time_dict = {1: 0.124, 2: 0.124} mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems, scan_duration=scan_time_dict) env = Environment(mass_spec, controller, 200, 300, progress_bar=True) set_log_level_warning() env.run() # check the scans ms2_scans = controller.scans[2] for i in range(4): assert len(ms2_scans[i].mzs) == len(even_chems[i].children) width = 200 controller2 = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap) scan_time_dict = {1: 0.124, 2: 0.124} mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems, scan_duration=scan_time_dict) env = Environment(mass_spec, controller2, 200, 300, progress_bar=True) env.run() ms2_scans2 = controller2.scans[2] assert len(ms2_scans2[0].mzs) == len(even_chems[0].children) + len(even_chems[1].children) assert len(ms2_scans2[1].mzs) == len(even_chems[2].children) + len(even_chems[3].children) width = 400 controller3 = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap) scan_time_dict = {1: 0.124, 2: 0.124} mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems, scan_duration=scan_time_dict) env = Environment(mass_spec, controller3, 200, 300, progress_bar=True) env.run() ms2_scans3 = controller3.scans[2] assert len(ms2_scans3[0].mzs) == sum([len(c.children) for c in even_chems]) assert len(ms2_scans3[0].mzs) == sum([len(s.mzs) for s in ms2_scans2[:2]])
def test_topn_from_mzml(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS2(controller) filename = 'topn_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def run_experiment(param): ''' Runs a Top-N experiment :param param: the experimental parameters :return: the analysis name that has been successfully ran ''' analysis_name = param['analysis_name'] mzml_out = param['mzml_out'] pickle_out = param['pickle_out'] N = param['N'] rt_tol = param['rt_tol'] if os.path.isfile(mzml_out) and os.path.isfile(pickle_out): logger.debug('Skipping %s' % (analysis_name)) else: logger.debug('Processing %s' % (analysis_name)) peak_sampler = param['peak_sampler'] if peak_sampler is None: # extract density from the fragmenatation file mzml_path = param['mzml_path'] fragfiles = param['fragfiles'] fragfile = fragfiles[( N, rt_tol, )] min_rt = param['min_rt'] max_rt = param['max_rt'] peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt, max_rt) mass_spec = IndependentMassSpectrometer(param['ionisation_mode'], param['data']) controller = TopNController(param['ionisation_mode'], param['N'], param['isolation_width'], param['mz_tol'], param['rt_tol'], param['min_ms1_intensity']) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, param['min_rt'], param['max_rt'], progress_bar=param['pbar']) set_log_level_warning() env.run() set_log_level_debug() env.write_mzML(None, mzml_out) save_obj(controller, pickle_out) return analysis_name
def test_FixedScansController(self, two_fixed_chems): logger.info('Testing FixedScansController') mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems] isolation_width = DEFAULT_ISOLATION_WIDTH mz_tol = 0.1 rt_tol = 15 min_rt = 110 max_rt = 112 ionisation_mode = POSITIVE controller = FixedScansController(schedule=None) mass_spec = IndependentMassSpectrometer(ionisation_mode, two_fixed_chems) env = Environment(mass_spec, controller, min_rt, max_rt) ms1_scan = get_default_scan_params(polarity=ionisation_mode) ms2_scan_1 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_2 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_3 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3] controller.set_tasks(schedule) set_log_level_warning() env.run() assert len(controller.scans[1]) == 1 assert len(controller.scans[2]) == 3 for scan in controller.scans[2]: assert scan.num_peaks > 0 env.write_mzML(OUT_DIR, 'fixedScansController.mzML')
def test_targeted(self): fs = EvenMZFormulaSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10) cr = ConstantChromatogramSampler() ms = FixedMS2Sampler() cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cr, ms2_sampler=ms) d = cs.sample(2, 2) # sample chems with m/z = 100 and 200 ionisation_mode = POSITIVE targets = [] targets.append(Target(101, 100, 102, 10, 20, adduct='M+H')) targets.append(Target(201, 200, 202, 10, 20, metadata={'a': 1})) ce_values = [10, 20, 30] n_replicates = 4 controller = TargetedController(targets, ce_values, n_replicates=n_replicates, limit_acquisition=True) mass_spec = IndependentMassSpectrometer(ionisation_mode, d) env = Environment(mass_spec, controller, 5, 25, progress_bar=True) set_log_level_warning() env.run() # check that we go all the scans we wanted for ms_level in controller.scans: assert len(controller.scans[ms_level]) > 0 set_log_level_debug() target_counts = {t: {c: 0 for c in ce_values} for t in targets} for s in controller.scans[2]: params = s.scan_params pmz = params.get(ScanParameters.PRECURSOR_MZ)[0].precursor_mz filtered_targets = list( filter( lambda x: (x.from_rt <= s.rt <= x.to_rt) and (x.from_mz <= pmz <= x.to_mz), targets)) assert len(filtered_targets) == 1 target = filtered_targets[0] ce = params.get(ScanParameters.COLLISION_ENERGY) target_counts[target][ce] += 1 for t in target_counts: for ce, count in target_counts[t].items(): assert count == n_replicates
def test_default_scan_time(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # run simulation using default scan times ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml, scan_duration=DEFAULT_SCAN_TIME_DICT) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_default.mzML' check_mzML(env, OUT_DIR, filename)
def test_multiple_isolation(self): N = 3 fs = EvenMZFormulaSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10) cr = ConstantChromatogramSampler() ms = FixedMS2Sampler() cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cr, ms2_sampler=ms) d = cs.sample(3, 2) # sample chems with m/z = 100 and 200 # ionisation_mode = POSITIVE controller = MultiIsolationController(N) ms = IndependentMassSpectrometer(POSITIVE, d) env = Environment(ms, controller, 10, 20, progress_bar=True) set_log_level_warning() env.run() assert len(controller.scans[1]) > 0 assert len(controller.scans[2]) > 0 # look at the first block of MS2 scans # and check that they are the correct super-positions mm = {} # first three scans hit the individual precursors mm[(0, )] = controller.scans[2][0] mm[(1, )] = controller.scans[2][1] mm[(2, )] = controller.scans[2][2] # next three should hit the pairs mm[(0, 1)] = controller.scans[2][3] mm[(0, 2)] = controller.scans[2][4] mm[(1, 2)] = controller.scans[2][5] # final should hit all three mm[(0, 1, 2)] = controller.scans[2][6] for key, value in mm.items(): actual_mz_vals = set(mm[key].mzs) expected_mz_vals = set() for k in key: for m in mm[(k, )].mzs: expected_mz_vals.add(m) assert expected_mz_vals == actual_mz_vals
def test_AIF_controller_with_beer_chems(self): logger.info('Testing Top-N controller with QC beer chemicals') # isolation_width = 1 # N = 10 # rt_tol = 15 # mz_tol = 10 ionisation_mode = POSITIVE min_mz = 100 max_mz = 500 # min_rt = 0 # max_rt = 500 # create a simulated mass spec without noise and Top-N controller scan_time_dict = {1: 0.124, 2: 0.124} mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS, scan_duration=scan_time_dict) params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz]) ms1_source_cid_energy = 30 controller = AIF(ms1_source_cid_energy, params=params) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) # set the log level to WARNING so we don't see too many messages # when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'AIF_qcbeer_chems_no_noise.mzML' check_mzML(env, OUT_DIR, filename)
def run_WeightedDEW(chems, scan_duration, params, out_dir): """ Simulate WeightedDEW controller :param chems: a list of UnknownChemicals present in the injection :param ps: old PeakSampler object, now only used to generate MS2 scans (TODO: should be removed as part of issue #46) :param params: a dictionary of parameters :param out_file: output mzML file :param out_dir: output directory :return: None """ logger.info('Running WeightedDEW simulation') logger.info(params) warn_handler_id = set_log_level_warning() t0_values = params['t0_values'] rt_tol_values = params['rt_tol_values'] params_list = [] for t0 in t0_values: for r in rt_tol_values: # copy params and add additional attributes we need copy_params = dict(params) copy_params['t0'] = t0 copy_params['r'] = r copy_params['chems'] = chems copy_params['scan_duration'] = scan_duration copy_params['out_dir'] = out_dir params_list.append(copy_params) # Try to run the controllers in parallel. If fails, then run it serially logger.warning('Running controllers in parallel, please wait ...') try: import ipyparallel as ipp rc = ipp.Client() dview = rc[:] # use all engines with dview.sync_imports(): pass dview.map_sync(run_single_WeightedDEW, params_list) except OSError: # cluster has not been started run_serial = True except ipp.error.TimeoutError: # takes too long to run run_serial = True if run_serial: # if any exception from above, try to run it serially logger.warning( 'IPython cluster not found, running controllers in serial mode') for copy_params in params_list: run_single_WeightedDEW(copy_params) set_log_level_debug(remove_id=warn_handler_id)
def run_vimms(no_injections, rt_box_size, mz_box_size): rt_range = [(0, 1440)] min_rt, max_rt = rt_range[0] ionisation_mode, isolation_width = POSITIVE, 1 N, rt_tol, mz_tol, min_ms1_intensity = 10, 15, 10, 5000 min_roi_intensity, min_roi_length, min_roi_length_for_fragmentation = \ 500, 3, 3 grid = GridEstimator( LocatorGrid(min_rt, max_rt, rt_box_size, 0, 3000, mz_box_size), IdentityDrift()) hmdbpath = os.path.join(os.path.abspath(os.getcwd()), "..", "..", "tests", "fixtures", "hmdb_compounds.p") hmdb = load_obj(hmdbpath) df = DatabaseFormulaSampler(hmdb, min_mz=100, max_mz=1000) cm = ChemicalMixtureCreator(df, adduct_prior_dict={POSITIVE: {"M+H": 1}}) chemicals = cm.sample(2000, 1) boxes = [] for i in range(no_injections): mz_noise = GaussianPeakNoise(0.1) mass_spec = IndependentMassSpectrometer(POSITIVE, chemicals, mz_noise=mz_noise) controller = NonOverlapController( ionisation_mode, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N, grid, rt_tol=rt_tol, min_roi_length_for_fragmentation=min_roi_length_for_fragmentation ) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) set_log_level_warning() env.run() boxes.append( [r.to_box(0.01, 0.01) for r in controller.roi_builder.get_rois()]) return boxes
def test_TopNDEW_agent(self): set_log_level_debug() fs = UniformMZFormulaSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=80) cr = GaussianChromatogramSampler(sigma=1) ms = FixedMS2Sampler() cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cr, ms2_sampler=ms) d = cs.sample(500, 2) ionisation_mode = POSITIVE # Example shows how the same Agent object can be used in consecutive controllers agent = TopNDEWAgent(ionisation_mode, 10, 0.7, 10, 15, 1500) controller = AgentBasedController(agent) spike_noise = UniformSpikeNoise(0.1, 1000) mass_spec = IndependentMassSpectrometer(ionisation_mode, d, spike_noise=spike_noise) env = Environment(mass_spec, controller, 0, 100, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS1(controller) check_non_empty_MS2(controller) check_mzML(env, OUT_DIR, 'shell.mzML') controller = AgentBasedController(agent) mass_spec = IndependentMassSpectrometer(ionisation_mode, d, spike_noise=spike_noise) env = Environment(mass_spec, controller, 0, 100, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS1(controller) check_non_empty_MS2(controller) check_mzML(env, OUT_DIR, 'shell2.mzML') controller = AgentBasedController(agent) mass_spec = IndependentMassSpectrometer(ionisation_mode, d, spike_noise=spike_noise) env = Environment(mass_spec, controller, 0, 100, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS1(controller) # check_non_empty_MS2(controller) # ms2 scans have been exhausted at this point check_mzML(env, OUT_DIR, 'shell3.mzML')
def main(): global file_spectra parser = argparse.ArgumentParser(description='Limited dataset creation') parser.add_argument('input_file_names', type=str) parser.add_argument('library_cache', type=str) parser.add_argument('libraries', type=str, nargs='+') parser.add_argument('--score_thresh', dest='score_thresh', type=float, default=0.7) parser.add_argument('--ms1_tol', dest='ms1_tol', type=float, default=1.) parser.add_argument('--ms2_tol', dest='ms2_tol', type=float, default=0.2) parser.add_argument('--min_matched_peaks', dest='min_matched_peaks', type=int, default=1) parser.add_argument('--output_csv_file', dest='output_csv_file', type=str, default='hits.csv') parser.add_argument('--log_level', dest='log_level', type=str, default='warning') parser.add_argument('--mgf_id_field', dest='mgf_id_field', type=str, default='SCANS') args = parser.parse_args() input_file_names = args.input_file_names if ',' in input_file_names: # multiple items input_file_names = input_file_names.split(',') else: # single item input_file_names = [input_file_names] assert len(input_file_names) > 0 # assume all the files have the same extension as the first one first = input_file_names[0] root, ext = os.path.splitext(first) if ext.lower() == '.mzml': query_spectra = {} for input_file_name in input_file_names: # load the ms2 scans from the .mzML file_spectra = load_scans_from_mzml(input_file_name) logger.warning("Loaded {} MS2 spectra from {}".format( len(file_spectra), input_file_name)) query_spectra[input_file_name] = file_spectra elif ext.lower() == '.mgf': query_spectra = {} for input_file_name in input_file_names: # load the ms2 scans from the .mgf file_spectra = load_mgf(input_file_name, id_field=args.mgf_id_field, spectra={}) logger.warning("Loaded {} MS2 spectra from {}".format( len(file_spectra), input_file_name)) query_spectra[input_file_name] = file_spectra else: logger.warning("Unknown input file format -- should be .mzML or .mgf") sys.exit(0) if args.log_level == 'warning': set_log_level_warning() elif args.log_level == 'debug': set_log_level_debug() libraries = args.libraries spec_libraries = {} if args.library_cache is not None: for library in libraries: # attempt to load library lib_file = os.path.join(args.library_cache, library + '.p') if os.path.isfile(lib_file): logger.warning("Loading {}".format(lib_file)) spec_libraries[library] = load_obj(lib_file) logger.warning("Loaded {}".format(lib_file)) else: logger.warning("Could not find {}".format(lib_file)) sys.exit(0) else: logger.warning("You must supply a library folder") sys.exit(0) all_hits = [] for input_file_name in query_spectra.keys(): file_spectra = query_spectra[input_file_name] logger.warning('Processing {}'.format(input_file_name)) for spec_id in tqdm(file_spectra.keys()): for library in spec_libraries: hits = spec_libraries[library].spectral_match( file_spectra[spec_id], score_thresh=args.score_thresh, ms2_tol=args.ms2_tol, ms1_tol=args.ms1_tol, min_match_peaks=args.min_matched_peaks) for hit in hits: new_hit = [ spec_id, library, hit[0], hit[1], hit[2].metadata['inchikey'] ] all_hits.append(new_hit) if len(all_hits) == 0: logger.warning("No hits found!") else: logger.warning('Writing output to {}'.format(args.output_csv_file)) with open(args.output_csv_file, 'w', newline='') as f: writer = csv.writer(f) writer.writerow( ['spec_id', 'library', 'hit_id', 'score', 'inchikey']) for hit in all_hits: writer.writerow(hit) # summary s, _, t, sc, ik = zip(*all_hits) logger.warning("{} unique spectra got hits".format(len(set(s)))) logger.warning("{} unique structures were hit".format( len(set([a.split('-')[0] for a in ik if a is not None]))))
def test_ms2_matching(self): rti = UniformRTAndIntensitySampler(min_rt=10, max_rt=20) fs = UniformMZFormulaSampler() adduct_prior_dict = {POSITIVE: {'M+H': 1}} cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=rti, adduct_prior_dict=adduct_prior_dict) d = cs.sample(300, 2) group_list = ['control', 'control', 'case', 'case'] group_dict = {} group_dict['control'] = { 'missing_probability': 0.0, 'changing_probability': 0.0 } group_dict['case'] = { 'missing_probability': 0.0, 'changing_probability': 1.0 } mm = MultipleMixtureCreator(d, group_list, group_dict) cl = mm.generate_chemical_lists() N = 10 isolation_width = 0.7 mz_tol = 0.001 rt_tol = 30 min_ms1_intensity = 0 set_log_level_warning() output_folder = os.path.join(OUT_DIR, 'ms2_matching') write_msp(d, 'mmm.msp', out_dir=output_folder) initial_exclusion_list = [] for i, chem_list in enumerate(cl): controller = TopNController( POSITIVE, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity, initial_exclusion_list=initial_exclusion_list) ms = IndependentMassSpectrometer(POSITIVE, chem_list) env = Environment(ms, controller, 10, 30, progress_bar=True) env.run() env.write_mzML(output_folder, '{}.mzML'.format(i)) mz_intervals = list( controller.exclusion.exclusion_list.boxes_mz.items()) rt_intervals = list( controller.exclusion.exclusion_list.boxes_rt.items()) unique_items_mz = set(i.data for i in mz_intervals) unique_items_rt = set(i.data for i in rt_intervals) assert len(unique_items_mz) == len(unique_items_rt) initial_exclusion_list = list(unique_items_mz) logger.warning(len(initial_exclusion_list)) set_log_level_debug() msp_file = os.path.join(output_folder, 'mmm.msp') # check with just the first file a, b = ms2_main(os.path.join(output_folder, '0.mzML'), msp_file, 1, 0.7) # check with all c, d = ms2_main(output_folder, os.path.join(output_folder, 'mmm.msp'), 1, 0.7) assert b == d assert c > a
spike_noise = UniformSpikeNoise(0.01, args.spike_max) ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE, dataset, spike_noise=spike_noise) controller = TopNController(POSITIVE_IONISATION_MODE, 10, 0.7, 0.01, 15, 1e3) env = Environment(ms, controller, min_time=args.min_rt - 50, max_time=args.max_rt + 50) set_log_level_warning() env.run() env.write_mzML(None, args.output_mzml_file) if args.output_swath_file is not None: sw = SWATH(args.min_mz, args.max_mz, 100, 0.0) ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE, dataset, spike_noise=spike_noise) env = Environment(ms, sw, min_time=args.min_rt - 50, max_time=args.max_rt + 50) env.run() env.write_mzML(None, args.output_swath_file)
def test_AIF_controller_with_simulated_chems(self, fragscan_dataset): logger.info('Testing Top-N controller with simulated chemicals') # create some chemical object assert len(fragscan_dataset) == N_CHEMS # isolation_width = 1 # N = 10 # rt_tol = 15 # mz_tol = 10 ionisation_mode = POSITIVE min_mz = 100 max_mz = 500 # shorten the rt range for quicker tests # min_rt = 0 # max_rt = 400 scan_time_dict = {1: 0.12, 2: 0.06} # create a simulated mass spec without noise and Top-N controller logger.info('Without noise') mass_spec = IndependentMassSpectrometer(ionisation_mode, fragscan_dataset, scan_duration=scan_time_dict) params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz]) ms1_source_cid_energy = 30 controller = AIF(ms1_source_cid_energy, params=params) # create an environment to run both the mass spec and controller min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE) env = Environment(mass_spec, controller, min_bound, max_bound, progress_bar=True) # set the log level to WARNING so we don't see too many messages when # environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'AIF_simulated_chems_no_noise.mzML' check_mzML(env, OUT_DIR, filename) # create a simulated mass spec with noise and Top-N controller logger.info('With noise') mz_noise = GaussianPeakNoiseLevelSpecific({2: 0.01}) intensity_noise = GaussianPeakNoiseLevelSpecific({2: 1000.}) mass_spec = IndependentMassSpectrometer(ionisation_mode, fragscan_dataset, mz_noise=mz_noise, intensity_noise=intensity_noise, scan_duration=scan_time_dict) params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz]) ms1_source_cid_energy = 30 controller = AIF(ms1_source_cid_energy, params=params) # create an environment to run both the mass spec and controller min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE) env = Environment(mass_spec, controller, min_bound, max_bound, progress_bar=True) # set the log level to WARNING so we don't see too many messages # when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'AIF_simulated_chems_with_noise.mzML' check_mzML(env, OUT_DIR, filename)