Beispiel #1
0
def top_n_roi_experiment_evaluation(datasets,
                                    min_rt,
                                    max_rt,
                                    N,
                                    isolation_window,
                                    mz_tol,
                                    rt_tol,
                                    min_ms1_intensity,
                                    min_roi_intensity,
                                    min_roi_length,
                                    base_chemicals=None,
                                    mzmine_files=None,
                                    rt_tolerance=100,
                                    experiment_dir=None,
                                    progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = TopN_RoiController(POSITIVE,
                                            isolation_window,
                                            mz_tol,
                                            min_ms1_intensity,
                                            min_roi_intensity,
                                            min_roi_length,
                                            N=N,
                                            rt_tol=rt_tol)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Beispiel #2
0
 def _update_grid(self):
     self.grid.boxes = self.grid.init_boxes(self.grid.rtboxes,
                                            self.grid.mzboxes)
     roi_aligner = RoiAligner(rt_tolerance=self.rt_tolerance)
     for inj_num, inj in enumerate(self.observed_rois):
         fn = self.drift_models[inj_num].get_estimator(inj_num)
         rt_shifts = [-fn(roi, inj_num)[0] for roi in inj]
         roi_aligner.add_sample(self.observed_rois,
                                self.grid.sample_number,
                                rt_shifts=rt_shifts)
     boxes = roi_aligner.get_boxes(
         method=self.box_method)  # TODO might need to add intensity here
     for box in boxes:
         self.grid.register_box(box)
Beispiel #3
0
def top_n_box_experiment(datasets,
                         base_chemicals,
                         rt_range,
                         boxes_params,
                         dataset_group_list,
                         isolation_width,
                         mz_tol,
                         min_ms1_intensity,
                         min_roi_intensity,
                         min_roi_length,
                         N,
                         rt_tol,
                         ionisation_mode=POSITIVE):
    env_list = []
    aligner = RoiAligner()
    boxes = None
    boxes_intensity = None
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i])
        controller = TopNBoxRoiController(ionisation_mode,
                                          isolation_width,
                                          mz_tol,
                                          min_ms1_intensity,
                                          min_roi_intensity,
                                          min_roi_length,
                                          boxes_params=boxes_params,
                                          boxes=boxes,
                                          boxes_intensity=boxes_intensity,
                                          N=N,
                                          rt_tol=rt_tol)
        env = Environment(mass_spec,
                          controller,
                          rt_range[0],
                          rt_range[1],
                          progress_bar=True)
        env.run()
        env_list.append(env)
        rois = env.controller.live_roi + env.controller.dead_roi
        aligner.add_sample(rois, 'sample_' + str(i), dataset_group_list[i])
        boxes = aligner.get_boxes()
        boxes_intensity = aligner.get_max_frag_intensities()
    final_evaluation = evaluate_multiple_simulated_env(
        env_list, base_chemicals=base_chemicals)
    return env_list, final_evaluation
Beispiel #4
0
def dsda_experiment_evaluation(datasets,
                               base_dir,
                               min_rt,
                               max_rt,
                               N,
                               isolation_window,
                               mz_tol,
                               rt_tol,
                               min_ms1_intensity,
                               mzmine_files=None,
                               rt_tolerance=100,
                               progress_bar=False):
    data_dir = os.path.join(base_dir, 'Data')
    schedule_dir = os.path.join(base_dir, 'settings')
    mass_spec = IndependentMassSpectrometer(
        POSITIVE, datasets[0])  # necessary to get timings for schedule
    create_dsda_schedule(mass_spec, N, min_rt, max_rt, base_dir)
    print('Please open and run R script now')
    time.sleep(1)
    template_file = os.path.join(base_dir, 'DsDA_Timing_schedule.csv')
    env_list = []
    mzml_files = []
    source_files = ['sample_' + "%03d" % i for i in range(len(datasets))]
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
        if i == 0:
            controller = TopNController(POSITIVE,
                                        N,
                                        isolation_window,
                                        mz_tol,
                                        rt_tol,
                                        min_ms1_intensity,
                                        ms1_shift=0,
                                        initial_exclusion_list=None,
                                        force_N=False)
        else:
            print('Looking for next schedule')
            new_schedule = get_schedule(i, schedule_dir)
            print('Found next schedule')
            time.sleep(1)
            schedule_param_list = dsda_get_scan_params(new_schedule,
                                                       template_file,
                                                       isolation_window,
                                                       mz_tol, rt_tol)
            controller = FixedScansController(schedule=schedule_param_list)
        env = Environment(mass_spec,
                          controller,
                          min_rt,
                          max_rt,
                          progress_bar=progress_bar)
        env.run()
        if progress_bar is False:
            print('Processed dataset ' + str(i))
        env_list.append(env)
        file_link = os.path.join(data_dir, source_files[i] + '.mzml')
        mzml_files.append(file_link)
        print("Processed ", i + 1, " files")
        env.write_mzML(data_dir, source_files[i] + '.mzml')
        print("Waiting for R to process .mzML files")
        if mzmine_files is None:
            evaluation = evaluate_multiple_simulated_env(env_list)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Beispiel #5
0
def flexible_non_overlap_experiment_evaluation(
        datasets,
        min_rt,
        max_rt,
        N,
        isolation_window,
        mz_tol,
        rt_tol,
        min_ms1_intensity,
        min_roi_intensity,
        min_roi_length,
        rt_box_size,
        mz_box_size,
        min_roi_length_for_fragmentation,
        scoring_params=None,
        base_chemicals=None,
        mzmine_files=None,
        rt_tolerance=100,
        experiment_dir=None,
        roi_type=ROI_TYPE_NORMAL,
        reset_length_seconds=1e6,
        intensity_increase_factor=10,
        drop_perc=0.1 / 100,
        exclusion_method=ROI_EXCLUSION_DEW,
        exclusion_t_0=None,
        progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        grid = GridEstimator(
            AllOverlapGrid(min_rt, max_rt, rt_box_size, 0, 3000, mz_box_size),
            IdentityDrift())
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        if scoring_params['theta3'] != 0:
            register_all_roi = True
        else:
            register_all_roi = False
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = FlexibleNonOverlapController(
                POSITIVE,
                isolation_window,
                mz_tol,
                min_ms1_intensity,
                min_roi_intensity,
                min_roi_length,
                N,
                grid,
                rt_tol=rt_tol,
                register_all_roi=register_all_roi,
                min_roi_length_for_fragmentation=
                min_roi_length_for_fragmentation,
                scoring_params=scoring_params,
                roi_type=roi_type,
                reset_length_seconds=reset_length_seconds,
                intensity_increase_factor=intensity_increase_factor,
                drop_perc=drop_perc,
                exclusion_method=exclusion_method,
                exclusion_t_0=exclusion_t_0)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None