def run_TopN(chems, scan_duration, params, out_dir): """ Simulate TopN controller :param chems: a list of UnknownChemicals present in the injection :param ps: old PeakSampler object, now only used to generate MS2 scans (TODO: should be removed as part of issue #46) :param params: a dictionary of parameters :param out_file: output mzML file :param out_dir: output directory :return: None """ logger.info('Running TopN simulation') logger.info(params) out_file = '%s_%s.mzML' % (params['controller_name'], params['sample_name']) controller = TopNController(params['ionisation_mode'], params['N'], params['isolation_width'], params['mz_tol'], params['rt_tol'], params['min_ms1_intensity']) mass_spec = IndependentMassSpectrometer(params['ionisation_mode'], chems, scan_duration=scan_duration) env = Environment(mass_spec, controller, params['min_rt'], params['max_rt'], progress_bar=True, out_dir=out_dir, out_file=out_file) logger.info('Generating %s' % out_file) env.run()
def test_TopN_controller_with_beer_chems_and_scan_duration_dict(self): logger.info('Testing Top-N controller with QC beer chemicals ' 'passing in the scan durations') isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # ps = None scan_duration_dict = {1: 0.2, 2: 0.1} # create a simulated mass spec without noise and Top-N controller and passing # in the scan_duration dict mass_spec = IndependentMassSpectrometer( ionisation_mode, BEER_CHEMS, scan_duration=scan_duration_dict) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) run_environment(env) # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'topN_controller_qcbeer_chems_no_noise_with_scan_duration.mzML' check_mzML(env, OUT_DIR, filename)
def test_mean_scan_time_from_mzml(self): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # extract chemicals from mzML roi_params = RoiParams(min_intensity=10, min_length=5) cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params) chems = cm.sample(None, 2) # extract mean timing per scan level from mzML sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True) ms = IndependentMassSpectrometer(ionisation_mode, chems, scan_duration=sd) # run simulation env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_mean_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def test_neg(self, even_chems): mass_spec = IndependentMassSpectrometer(NEGATIVE, even_chems) N = 10 controller = TopNController(NEGATIVE, N, 0.7, 10, 15, 0, force_N=True) env = Environment(mass_spec, controller, 200, 300, progress_bar=True) run_environment(env) for level in controller.scans: for scan in controller.scans[level]: assert scan.scan_params.get( ScanParameters.POLARITY) == NEGATIVE ms1_peaks = [int(m) for m in controller.scans[1][0].mzs] ms1_peaks.sort() assert 98 in ms1_peaks assert 198 in ms1_peaks assert 298 in ms1_peaks assert 398 in ms1_peaks filename = 'topn_negative.mzML' check_mzML(env, OUT_DIR, filename) # load the file and check polarity in the mzml run = pymzml.run.Reader(os.path.join(OUT_DIR, filename)) for n, spec in enumerate(run): assert spec.get( 'MS:1000129') # this is the negative scan accession
def top_n_experiment(datasets, base_chemicals, rt_range, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity, ionisation_mode=POSITIVE): env_list = [] for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i]) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity, ms1_shift=0, initial_exclusion_list=None, force_N=False) env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=True) env.run() env_list.append(env) final_evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) return env_list, final_evaluation
def test_negative_fixed(self): fs = EvenMZFormulaSampler() ms = FixedMS2Sampler() ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101) cs = ConstantChromatogramSampler() cm = ChemicalMixtureCreator(fs, ms2_sampler=ms, rt_and_intensity_sampler=ri, chromatogram_sampler=cs) dataset = cm.sample(3, 2) N = 10 isolation_width = 0.7 mz_tol = 10 rt_tol = 15 ms = IndependentMassSpectrometer(NEGATIVE, dataset) controller = TopNController(NEGATIVE, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) env = Environment(ms, controller, 102, 110, progress_bar=True) set_log_level_warning() env.run() ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs] expected_vals = [98, 198, 298] for i, m in enumerate(ms1_mz_vals): assert m == expected_vals[i] expected_frags = set([88, 78, 188, 178, 288, 278]) for scan in controller.scans[2]: for m in scan.mzs: assert int(m) in expected_frags
def test_TopN_controller_with_simulated_chems(self, fragscan_dataset): logger.info( 'Testing Top-N controller with simulated chemicals -- no noise') assert len(fragscan_dataset) == N_CHEMS isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # create a simulated mass spec without noise and Top-N controller mass_spec = IndependentMassSpectrometer(ionisation_mode, fragscan_dataset) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_bound, max_bound, progress_bar=True) run_environment(env) # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) filename = 'topN_controller_simulated_chems_no_noise.mzML' check_mzML(env, OUT_DIR, filename)
def test_TopN_controller_with_beer_chems(self): logger.info('Testing Top-N controller with QC beer chemicals') isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # create a simulated mass spec without noise and Top-N controller mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=False) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'topN_controller_qcbeer_chems_no_noise.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def test_mass_spec(self): logger.info( 'Testing mass spec using the Top-N controller and QC beer chemicals' ) isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE task_manager = TaskManager(buffer_size=3) mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS, task_manager=task_manager) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) # run_environment(env) env.run() # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'test_mass_spec.mzML' check_mzML(env, OUT_DIR, filename)
def top_n_experiment_evaluation(datasets, min_rt, max_rt, N, isolation_window, mz_tol, rt_tol, min_ms1_intensity, base_chemicals=None, mzmine_files=None, rt_tolerance=100, experiment_dir=None, progress_bar=False): if base_chemicals is not None or mzmine_files is not None: env_list = [] mzml_files = [] source_files = ['sample_' + str(i) for i in range(len(datasets))] for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i]) controller = TopNController(POSITIVE, N, isolation_window, mz_tol, rt_tol, min_ms1_intensity, ms1_shift=0, initial_exclusion_list=None, force_N=False) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=progress_bar) env.run() if progress_bar is False: print('Processed dataset ' + str(i)) env_list.append(env) if base_chemicals is None: file_link = os.path.join(experiment_dir, source_files[i] + '.mzml') mzml_files.append(file_link) env.write_mzML(experiment_dir, source_files[i] + '.mzml') if base_chemicals is not None: evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) else: roi_aligner = RoiAligner(rt_tolerance=rt_tolerance) for i in range(len(mzml_files)): roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i], source_files[i], 'mzmine') evaluation = evaluate_multi_peak_roi_aligner( roi_aligner, source_files) return env_list, evaluation else: return None, None
def test_topn_from_mzml(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS2(controller) filename = 'topn_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def test_exclusion_simple_data(self): # three chemicals, both will get fragmented # first time around and exclusion such that neither # should be fragmented second time fs = EvenMZFormulaSampler() ch = ConstantChromatogramSampler() rti = UniformRTAndIntensitySampler(min_rt=0, max_rt=5) cs = ChemicalMixtureCreator(fs, chromatogram_sampler=ch, rt_and_intensity_sampler=rti) n_chems = 3 dataset = cs.sample(n_chems, 2) ionisation_mode = POSITIVE initial_exclusion_list = [] min_ms1_intensity = 0 N = 10 mz_tol = 10 rt_tol = 30 isolation_width = 1 all_controllers = [] for i in range(3): mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset) controller = TopNController( ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity, initial_exclusion_list=initial_exclusion_list) env = Environment(mass_spec, controller, 0, 20, progress_bar=True) run_environment(env) mz_intervals = list( controller.exclusion.exclusion_list.boxes_mz.items()) rt_intervals = list( controller.exclusion.exclusion_list.boxes_rt.items()) unique_items_mz = set(i.data for i in mz_intervals) unique_items_rt = set(i.data for i in rt_intervals) assert len(unique_items_mz) == len(unique_items_rt) initial_exclusion_list = list(unique_items_mz) all_controllers.append(controller) assert len(all_controllers[0].scans[2]) == n_chems assert len(all_controllers[1].scans[2]) == 0 assert len(all_controllers[2].scans[2]) == 0
def run_experiment(param): ''' Runs a Top-N experiment :param param: the experimental parameters :return: the analysis name that has been successfully ran ''' analysis_name = param['analysis_name'] mzml_out = param['mzml_out'] pickle_out = param['pickle_out'] N = param['N'] rt_tol = param['rt_tol'] if os.path.isfile(mzml_out) and os.path.isfile(pickle_out): logger.debug('Skipping %s' % (analysis_name)) else: logger.debug('Processing %s' % (analysis_name)) peak_sampler = param['peak_sampler'] if peak_sampler is None: # extract density from the fragmenatation file mzml_path = param['mzml_path'] fragfiles = param['fragfiles'] fragfile = fragfiles[( N, rt_tol, )] min_rt = param['min_rt'] max_rt = param['max_rt'] peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt, max_rt) mass_spec = IndependentMassSpectrometer(param['ionisation_mode'], param['data']) controller = TopNController(param['ionisation_mode'], param['N'], param['isolation_width'], param['mz_tol'], param['rt_tol'], param['min_ms1_intensity']) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, param['min_rt'], param['max_rt'], progress_bar=param['pbar']) set_log_level_warning() env.run() set_log_level_debug() env.write_mzML(None, mzml_out) save_obj(controller, pickle_out) return analysis_name
def test_TopN_forceN(self, ten_chems): mass_spec = IndependentMassSpectrometer(POSITIVE, ten_chems) N = 20 controller = TopNController(POSITIVE, N, 0.7, 10, 15, 0, force_N=True) env = Environment(mass_spec, controller, 200, 300, progress_bar=True) run_environment(env) all_scans = controller.scans[1] + controller.scans[2] # sort by RT all_scans.sort(key=lambda x: x.rt) ms1_pos = [] for i, s in enumerate(all_scans): if s.ms_level == 1: ms1_pos.append(i) for i, mp in enumerate(ms1_pos[:-1]): assert ms1_pos[i + 1] - (mp + 1) == N
def test_TopN_controller_with_beer_chems_and_initial_exclusion_list(self): logger.info('Testing Top-N controller with QC beer chemicals and ' 'an initial exclusion list') isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE initial_exclusion_list = [] for i in range(3): mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS) controller = TopNController( ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY, initial_exclusion_list=initial_exclusion_list) env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) run_environment(env) mz_intervals = list( controller.exclusion.exclusion_list.boxes_mz.items()) rt_intervals = list( controller.exclusion.exclusion_list.boxes_rt.items()) unique_items_mz = set(i.data for i in mz_intervals) unique_items_rt = set(i.data for i in rt_intervals) assert len(unique_items_mz) == len(unique_items_rt) initial_exclusion_list = list(unique_items_mz) # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'topN_controller_qcbeer_exclusion_%d.mzML' % i check_mzML(env, OUT_DIR, filename)
def test_default_scan_time(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # run simulation using default scan times ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml, scan_duration=DEFAULT_SCAN_TIME_DICT) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_default.mzML' check_mzML(env, OUT_DIR, filename)
def create_controller(controller_method, param_dict): if controller_method == 'TopN_RoiController': controller = TopN_RoiController( param_dict['ionisation_mode'], param_dict['isolation_width'], param_dict['mz_tol'], param_dict['min_ms1_intensity'], param_dict['min_roi_intensity'], param_dict['min_roi_length'], param_dict['N'], param_dict['rt_tol'], param_dict['min_roi_length_for_fragmentation'], param_dict['length_units'], param_dict['ms1_shift'], param_dict['params']) if controller_method == 'TopN_SmartRoiController': controller = TopN_SmartRoiController( param_dict['ionisation_mode'], param_dict['isolation_width'], param_dict['mz_tol'], param_dict['min_ms1_intensity'], param_dict['min_roi_intensity'], param_dict['min_roi_length'], param_dict['N'], param_dict['rt_tol'], param_dict['min_roi_length_for_fragmentation'], param_dict['reset_length_seconds'], param_dict['intensity_increase_factor'], param_dict['length_units'], param_dict['drop_perc'], param_dict['ms1_shift'], param_dict['params']) elif controller_method == 'TopNController': controller = TopNController( param_dict['ionisation_mode'], param_dict['N'], param_dict['isolation_width'], param_dict['mz_tol'], param_dict['rt_tol'], param_dict['min_ms1_intensity'], param_dict['ms1_shift'], param_dict['initial_exclusion_list'], param_dict['params']) elif controller_method == 'WeightedDewController': controller = WeightedDEWController( param_dict['ionisation_mode'], param_dict['N'], param_dict['isolation_width'], param_dict['mz_tol'], param_dict['rt_tol'], param_dict['min_ms1_intensity'], param_dict['ms1_shift'], param_dict['exclusion_t_0'], param_dict['log_intensity'], param_dict['params']) else: logger.warning('Invalid controller_method') return controller
def top_n_evaluation(param_dict): mass_spec = load_obj(param_dict['mass_spec_file']) params = load_obj(param_dict['params_file']) topn = TopNController(param_dict['ionisation_mode'], param_dict['N'], param_dict['isolation_width'], param_dict['mz_tol'], param_dict['rt_tol'], param_dict['min_ms1_intensity'], params=params) chemical_coverage = run_env(mass_spec, topn, param_dict['min_rt'], param_dict['max_rt'], param_dict['save_file_name']) coverage = run_coverage_evaluation(param_dict['box_file'], param_dict['save_file_name'], param_dict['half_isolation_window']) print('coverage', coverage) print('chemical_coverage', chemical_coverage) if param_dict['coverage_type'] == 'coverage': return coverage else: return chemical_coverage
def dsda_experiment_evaluation(datasets, base_dir, min_rt, max_rt, N, isolation_window, mz_tol, rt_tol, min_ms1_intensity, mzmine_files=None, rt_tolerance=100, progress_bar=False): data_dir = os.path.join(base_dir, 'Data') schedule_dir = os.path.join(base_dir, 'settings') mass_spec = IndependentMassSpectrometer( POSITIVE, datasets[0]) # necessary to get timings for schedule create_dsda_schedule(mass_spec, N, min_rt, max_rt, base_dir) print('Please open and run R script now') time.sleep(1) template_file = os.path.join(base_dir, 'DsDA_Timing_schedule.csv') env_list = [] mzml_files = [] source_files = ['sample_' + "%03d" % i for i in range(len(datasets))] for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i]) if i == 0: controller = TopNController(POSITIVE, N, isolation_window, mz_tol, rt_tol, min_ms1_intensity, ms1_shift=0, initial_exclusion_list=None, force_N=False) else: print('Looking for next schedule') new_schedule = get_schedule(i, schedule_dir) print('Found next schedule') time.sleep(1) schedule_param_list = dsda_get_scan_params(new_schedule, template_file, isolation_window, mz_tol, rt_tol) controller = FixedScansController(schedule=schedule_param_list) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=progress_bar) env.run() if progress_bar is False: print('Processed dataset ' + str(i)) env_list.append(env) file_link = os.path.join(data_dir, source_files[i] + '.mzml') mzml_files.append(file_link) print("Processed ", i + 1, " files") env.write_mzML(data_dir, source_files[i] + '.mzml') print("Waiting for R to process .mzML files") if mzmine_files is None: evaluation = evaluate_multiple_simulated_env(env_list) else: roi_aligner = RoiAligner(rt_tolerance=rt_tolerance) for i in range(len(mzml_files)): roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i], source_files[i], 'mzmine') evaluation = evaluate_multi_peak_roi_aligner( roi_aligner, source_files) return env_list, evaluation else: return None, None
def test_TopN_controller_advanced_params(self): # set some values that are not the defaults, so we know they're passed correctly params = AdvancedParams(default_ms1_scan_window=(10.0, 2000.0), ms1_agc_target=100000, ms1_max_it=500, ms1_collision_energy=200, ms1_orbitrap_resolution=100000, ms1_activation_type='CID', ms1_mass_analyser='IonTrap', ms1_isolation_mode='IonTrap', ms1_source_cid_energy=10, ms2_agc_target=50000, ms2_max_it=250, ms2_collision_energy=300, ms2_orbitrap_resolution=100000, ms2_activation_type='CID', ms2_mass_analyser='IonTrap', ms2_isolation_mode='IonTrap', ms2_source_cid_energy=20) isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # create a simulated mass spec without noise and Top-N controller mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY, params=params) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) run_environment(env) # check that some of the scan parameters returned are actually what we set # ms1 check scan = controller.scans[1][0] scan_params = scan.scan_params assert scan_params.get( ScanParameters.FIRST_MASS) == params.default_ms1_scan_window[0] assert scan_params.get( ScanParameters.LAST_MASS) == params.default_ms1_scan_window[1] assert scan_params.get( ScanParameters.AGC_TARGET) == params.ms1_agc_target assert scan_params.get(ScanParameters.MAX_IT) == params.ms1_max_it assert scan_params.get( ScanParameters.COLLISION_ENERGY) == params.ms1_collision_energy assert scan_params.get(ScanParameters.ORBITRAP_RESOLUTION ) == params.ms1_orbitrap_resolution # noqa assert scan_params.get( ScanParameters.ACTIVATION_TYPE) == params.ms1_activation_type assert scan_params.get( ScanParameters.MASS_ANALYSER) == params.ms1_mass_analyser assert scan_params.get( ScanParameters.ISOLATION_MODE) == params.ms1_isolation_mode assert scan_params.get( ScanParameters.SOURCE_CID_ENERGY) == params.ms1_source_cid_energy # ms2 check scan = controller.scans[2][0] scan_params = scan.scan_params assert scan_params.get( ScanParameters.AGC_TARGET) == params.ms2_agc_target assert scan_params.get(ScanParameters.MAX_IT) == params.ms2_max_it assert scan_params.get( ScanParameters.COLLISION_ENERGY) == params.ms2_collision_energy assert scan_params.get(ScanParameters.ORBITRAP_RESOLUTION ) == params.ms2_orbitrap_resolution # noqa assert scan_params.get( ScanParameters.ACTIVATION_TYPE) == params.ms2_activation_type assert scan_params.get( ScanParameters.MASS_ANALYSER) == params.ms2_mass_analyser assert scan_params.get( ScanParameters.ISOLATION_MODE) == params.ms2_isolation_mode assert scan_params.get( ScanParameters.SOURCE_CID_ENERGY) == params.ms2_source_cid_energy
def test_ms2_matching(self): rti = UniformRTAndIntensitySampler(min_rt=10, max_rt=20) fs = UniformMZFormulaSampler() adduct_prior_dict = {POSITIVE: {'M+H': 1}} cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=rti, adduct_prior_dict=adduct_prior_dict) d = cs.sample(300, 2) group_list = ['control', 'control', 'case', 'case'] group_dict = {} group_dict['control'] = { 'missing_probability': 0.0, 'changing_probability': 0.0 } group_dict['case'] = { 'missing_probability': 0.0, 'changing_probability': 1.0 } mm = MultipleMixtureCreator(d, group_list, group_dict) cl = mm.generate_chemical_lists() N = 10 isolation_width = 0.7 mz_tol = 0.001 rt_tol = 30 min_ms1_intensity = 0 set_log_level_warning() output_folder = os.path.join(OUT_DIR, 'ms2_matching') write_msp(d, 'mmm.msp', out_dir=output_folder) initial_exclusion_list = [] for i, chem_list in enumerate(cl): controller = TopNController( POSITIVE, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity, initial_exclusion_list=initial_exclusion_list) ms = IndependentMassSpectrometer(POSITIVE, chem_list) env = Environment(ms, controller, 10, 30, progress_bar=True) env.run() env.write_mzML(output_folder, '{}.mzML'.format(i)) mz_intervals = list( controller.exclusion.exclusion_list.boxes_mz.items()) rt_intervals = list( controller.exclusion.exclusion_list.boxes_rt.items()) unique_items_mz = set(i.data for i in mz_intervals) unique_items_rt = set(i.data for i in rt_intervals) assert len(unique_items_mz) == len(unique_items_rt) initial_exclusion_list = list(unique_items_mz) logger.warning(len(initial_exclusion_list)) set_log_level_debug() msp_file = os.path.join(output_folder, 'mmm.msp') # check with just the first file a, b = ms2_main(os.path.join(output_folder, '0.mzML'), msp_file, 1, 0.7) # check with all c, d = ms2_main(output_folder, os.path.join(output_folder, 'mmm.msp'), 1, 0.7) assert b == d assert c > a
if args.print_chems: logger.debug("Sampled chems") for chem in dataset: logger.debug(chem) if args.output_msp_file is not None: write_msp(dataset, args.output_msp_file) spike_noise = UniformSpikeNoise(0.01, args.spike_max) ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE, dataset, spike_noise=spike_noise) controller = TopNController(POSITIVE_IONISATION_MODE, 10, 0.7, 0.01, 15, 1e3) env = Environment(ms, controller, min_time=args.min_rt - 50, max_time=args.max_rt + 50) set_log_level_warning() env.run() env.write_mzML(None, args.output_mzml_file) if args.output_swath_file is not None: sw = SWATH(args.min_mz, args.max_mz, 100, 0.0) ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE, dataset,
def topn_processor(): pathlist = [] base_dir = 'documents/simple_ms1/example_data' # base_dir = 'example_data' mzml_path = os.path.join(base_dir, 'beers', 'fragmentation', 'mzML') file_name = 'Beer_multibeers_1_T10_POS.mzML' experiment_name = 'mzml_compare' experiment_out_dir = os.path.join(base_dir, 'results', experiment_name) min_rt = 0 max_rt = 1441 kde_min_ms1_intensity = 0 # min intensity to be selected for kdes kde_min_ms2_intensity = 0 roi_mz_tol = 10 roi_min_length = 1 roi_min_intensity = 0 roi_start_rt = min_rt roi_stop_rt = max_rt isolation_width = 1 # the (full) isolation width in Dalton around a selected precursor m/z ionisation_mode = POSITIVE N = 10 rt_tol = 15 mz_tol = 10 min_ms1_intensity = 1.75E5 # minimum ms1 intensity to fragment mzml_filename = 'simulated.mzML' mzml_out = os.path.join(experiment_out_dir, mzml_filename) pathlist.append(mzml_out) print('#' * 10, 'Train densities') ds = DataSource() ds.load_data(mzml_path, file_name=file_name) bandwidth_mz_intensity_rt = 1.0 bandwidth_n_peaks = 1.0 ps = get_spectral_feature_database(ds, file_name, kde_min_ms1_intensity, kde_min_ms2_intensity, min_rt, max_rt, bandwidth_mz_intensity_rt, bandwidth_n_peaks) print('#' * 10, 'Extract all ROIs') mzml_file = os.path.join(mzml_path, file_name) good_roi, junk = make_roi(mzml_file, mz_tol=roi_mz_tol, mz_units='ppm', min_length=roi_min_length, min_intensity=roi_min_intensity, start_rt=roi_start_rt, stop_rt=roi_stop_rt) all_roi = good_roi + junk print('#' * 10, len(all_roi)) keep = [] for roi in all_roi: if np.count_nonzero( np.array(roi.intensity_list) > min_ms1_intensity) > 0: keep.append(roi) all_roi = keep set_log_level_debug() rtcc = RoiToChemicalCreator(ps, all_roi) data = rtcc.chemicals save_obj(data, os.path.join(experiment_out_dir, 'dataset.p')) set_log_level_warning() pbar = True mass_spec = IndependentMassSpectrometer(ionisation_mode, data, ps) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() set_log_level_debug() env.write_mzML(experiment_out_dir, mzml_filename) print('#' * 10, 'Compare Results') matplotlib.use('agg') simulated_input_file = mzml_out simulated_mzs, simulated_rts, simulated_intensities, simulated_cumsum_ms1, simulated_cumsum_ms2 = count_stuff( simulated_input_file, min_rt, max_rt) real_input_file = mzml_file real_mzs, real_rts, real_intensities, real_cumsum_ms1, real_cumsum_ms2 = count_stuff( real_input_file, min_rt, max_rt) plt.rcParams.update({'font.size': 14}) out_file = os.path.join(base_dir, 'results', 'topN_num_scans.png') pathlist.append(out_file) plot_num_scans(real_cumsum_ms1, real_cumsum_ms2, simulated_cumsum_ms1, simulated_cumsum_ms2, out_file) mz_tol = None # in ppm. if None, then 2 decimal places is used for matching the m/z rt_tol = 5 # seconds matches = match_peaklist(real_mzs, real_rts, real_intensities, simulated_mzs, simulated_rts, simulated_intensities, mz_tol, rt_tol) check_found_matches(matches, 'Real', 'Simulated') mz_tol = None rt_tol = 10 matches = match_peaklist(real_mzs, real_rts, real_intensities, simulated_mzs, simulated_rts, simulated_intensities, mz_tol, rt_tol) check_found_matches(matches, 'Real', 'Simulated') mz_tol = None rt_tol = 15 matches = match_peaklist(real_mzs, real_rts, real_intensities, simulated_mzs, simulated_rts, simulated_intensities, mz_tol, rt_tol) check_found_matches(matches, 'Real', 'Simulated') unmatched_intensities = [] matched_intensities = [] for key, value in list(matches.items()): intensity = key[2] if value is None: unmatched_intensities.append(intensity) else: matched_intensities.append(intensity) plt.rcParams.update({'font.size': 18}) out_file = os.path.join(base_dir, 'results', 'topN_matched_intensities.png') plot_matched_intensities(matched_intensities, unmatched_intensities, out_file) pathlist.append(out_file) out_file = os.path.join(base_dir, 'results', 'topN_matched_precursors.png') plot_matched_precursors(matches, 50, 1000, 180, 1260, out_file) pathlist.append(out_file) return pathlist
def test_TopN_controller_with_simulated_chems(self): logger.info('Testing Top-N controller with simulated chemicals') # create some chemical objects chems = ChemicalCreator(self.ps, ROI_Sources, hmdb) dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level, get_children_method=GET_MS2_BY_PEAKS) self.assertEqual(len(dataset), n_chems) isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # create a simulated mass spec without noise and Top-N controller logger.info('Without noise') mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=False) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'topN_controller_simulated_chems_no_noise.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) # create a simulated mass spec with noise and Top-N controller logger.info('With noise') mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=True) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'topN_controller_simulated_chems_with_noise.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()