예제 #1
0
def test_molecular_formula_search_db():
    
    MSParameters.molecular_search.isAdduct = False
    MSParameters.molecular_search.isRadical = False

    mass_spec_obj = create_mass_spectrum()
    
    time1 = time.time()
    
    SearchMolecularFormulas(mass_spec_obj, first_hit=True).run_worker_mass_spectrum()
    
    print('searching molecular formulas took %.3f seconds' % (time.time() - time1))
    
    i = 0
    j = 0
    error = list()
    mass = list()
    abundance = list()
    
    for mspeak in mass_spec_obj.sort_by_abundance():
        
        if mspeak.is_assigned:
            i += 1
            for mformula in mspeak:
                mass.append(mspeak.mz_exp)
                error.append(mformula.mz_error)
                abundance.append(mspeak.abundance)
        else:
            j += 1
            pass
    
    print('%i peaks assigned and %i peaks not assigned' % (i, j))
예제 #2
0
def test_run_molecular_formula_search():
    
    MSParameters.molecular_search.usedAtoms['F'] = (0,0)
    MSParameters.molecular_search.usedAtoms['P'] = (0,0)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0,0)
    MSParameters.molecular_search.isAdduct = False
    MSParameters.molecular_search.isRadical = False

    MSParameters.molecular_search.used_atom_valences['P'] = 0
    MSParameters.molecular_search.used_atom_valences['F'] = 0
    MSParameters.molecular_search.used_atom_valences['Cl'] = 0

    mz = [215.09269]
    abundance = [1]
    rp, s2n = [1] ,[1]
    dataname = 'one peak'
    mass_spectrum_obj = ms_from_array_centroid(mz, abundance, rp, s2n, dataname)

    SearchMolecularFormulas(mass_spectrum_obj).run_worker_ms_peaks([mass_spectrum_obj[0]])
    
    ms_peak = mass_spectrum_obj[0]
    print(ms_peak.mz_exp)
    if ms_peak.is_assigned:
        for formula in ms_peak:
            print(formula.string_formated, formula.mz_error)
예제 #3
0
def search_ms1_data(icrfile: str, dict_metal_eicdata: Dict[str, EIC_Data],
                    parameters: LCMSParameters):
    '''place holder for parsing and search LC FT-MS data'''

    lcms_obj, parser = run_thermo(icrfile, parameters)

    tic_data, ax_tic = lcms_obj.get_tic(ms_type='MS !d',
                                        peak_detection=True,
                                        smooth=False,
                                        plot=True)

    plt.show()

    for metal, eic_data in dict_metal_eicdata.items():

        print(metal, eic_data.apexes)

        for peak_indexex in eic_data.apexes:

            ftms_scans_index = ([
                find_nearest_scan(eic_data.time[i], tic_data)
                for i in peak_indexex
            ])
            ftms_scans = [tic_data.scans[i] for i in ftms_scans_index]
            ftms_times = [tic_data.time[i] for i in ftms_scans_index]

            retention_time = tic_data.time[ftms_scans_index[1]]

            print(ftms_scans)
            print(ftms_times)

            parser.chromatogram_settings.start_scan = ftms_scans[0]
            parser.chromatogram_settings.end_scan = ftms_scans[-1]

            mass_spec = parser.get_average_mass_spectrum_in_scan_range(
                auto_process=False)
            mass_spec.retention_time = retention_time

            mass_spec.settings = parameters.mass_spectrum
            mass_spec.molecular_search_settings = parameters.ms1_molecular_search
            mass_spec.mspeaks_settings = parameters.ms_peak
            mass_spec.process_mass_spec()

            metal_atom = ''.join(i for i in metal if not i.isdigit())
            mass_spec.molecular_search_settings.usedAtoms[metal_atom] = (1, 1)

            mass_spec.plot_profile_and_noise_threshold()

            SearchMolecularFormulas(
                mass_spec, first_hit=False).run_worker_mass_spectrum()
            mass_spec.molecular_search_settings.usedAtoms[metal_atom] = (0, 0)

            mass_spec.percentile_assigned(report_error=True)
            print(metal)
            filename = '{}_rt{}_{}'.format(metal, retention_time,
                                           mass_spec.sample_name).replace(
                                               ".", "_")
            print(filename)
            mass_spec.to_csv(filename, write_metadata=False)
예제 #4
0
def run_molecular_formula_search(mz, out, parameters_filepath):

    mz = [mz]
    abundance = [1]
    rp, s2n = [[1], [1]]
    dataname = Path(str(out))

    mass_spectrum_obj = ms_from_array_centroid(mz, abundance, rp, s2n,
                                               dataname)

    parameter_from_json.load_and_set_parameters_ms(
        mass_spectrum_obj, parameters_path=parameters_filepath)

    mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = 10
    mass_spectrum_obj.molecular_search_settings.use_isotopologue_filter = False

    click.echo('Searching for molecular formulas within %.3f and %.3f ppm' %
               (mass_spectrum_obj.molecular_search_settings.min_ppm_error,
                mass_spectrum_obj.molecular_search_settings.max_ppm_error))

    SearchMolecularFormulas(mass_spectrum_obj,
                            find_isotopologues=True).run_worker_ms_peaks(
                                [mass_spectrum_obj[0]])

    ms_peak = mass_spectrum_obj[0]

    if ms_peak:

        header = [
            'Molecular Formula', 'Calculated m/z', 'Mass Error', 'DBE',
            'Ion Type'
        ]

        results = []

        for formula in ms_peak:

            results.append([
                formula.to_string, formula.mz_calc, formula.mz_error,
                formula.dbe, formula.ion_type
            ])

        click.echo(tabulate(results,
                            headers=header,
                            floatfmt=("s", ".5f", ".5f", ".1f", "s")),
                   file=out)
        click.echo('', file=out)

    else:

        click.echo(
            "Could not find a possible molecular formula match for the m/z %.5f"
            % mz[0],
            file=out)
        click.echo('', file=out)
예제 #5
0
def search_sox(mass_spectrum_obj):

    filter_by_resolving_power()
    
    MSParameters.molecular_search.usedAtoms['O'] = (1, 10)
    MSParameters.molecular_search.usedAtoms['N'] = (0, 0)
    MSParameters.molecular_search.usedAtoms['S'] = (1, 3)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0, 0)
    
    SearchMolecularFormulas(mass_spectrum_obj, first_hit=True).run_worker_mass_spectrum()
예제 #6
0
def assign_mf_pox(mass_spectrum_obj):
    
    MSParameters.molecular_search.usedAtoms['O'] = (4, 20)
    MSParameters.molecular_search.usedAtoms['N'] = (0, 0)
    MSParameters.molecular_search.usedAtoms['S'] = (0, 0)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0,0)
    MSParameters.molecular_search.usedAtoms['P'] = (1, 1)
      
    MSParameters.molecular_search.isProtonated = True
    MSParameters.molecular_search.isRadical = True
    MSParameters.molecular_search.isAdduct = True

    SearchMolecularFormulas(mass_spectrum_obj, first_hit=True).run_worker_mass_spectrum()
예제 #7
0
def run_assignment(file_location):

    # mass_spectrum = run_bruker(file_location)
    # mass_spectrum = get_masslist(file_location)
    mass_spectrum = run_thermo(file_location)

    mass_spectrum.molecular_search_settings.error_method = 'None'
    mass_spectrum.molecular_search_settings.min_ppm_error = -5
    mass_spectrum.molecular_search_settings.max_ppm_error = 5

    mass_spectrum.molecular_search_settings.url_database = None
    mass_spectrum.molecular_search_settings.min_dbe = 0
    mass_spectrum.molecular_search_settings.max_dbe = 50

    mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1, 100)
    mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4, 200)
    mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1, 30)
    mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Cl'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Br'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['P'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Na'] = (0, 0)

    mass_spectrum.molecular_search_settings.isProtonated = True
    mass_spectrum.molecular_search_settings.isRadical = False
    mass_spectrum.molecular_search_settings.isAdduct = False

    # mass_spectrum.filter_by_max_resolving_power(15, 2)
    SearchMolecularFormulas(mass_spectrum,
                            first_hit=False).run_worker_mass_spectrum()

    mass_spectrum.percentile_assigned(report_error=True)
    mass_spectrum.molecular_search_settings.score_method = "prob_score"
    mass_spectrum.molecular_search_settings.output_score_method = "prob_score"

    # export_calc_isotopologues(mass_spectrum, "15T_Neg_ESI_SRFA_Calc_Isotopologues")

    mass_spectrum_by_classes = HeteroatomsClassification(
        mass_spectrum, choose_molecular_formula=True)

    mass_spectrum_by_classes.plot_ms_assigned_unassigned()
    plt.show()
    mass_spectrum_by_classes.plot_mz_error()
    plt.show()
    mass_spectrum_by_classes.plot_ms_class("O2")
    plt.show()
    # dataframe = mass_spectrum_by_classes.to_dataframe()
    return mass_spectrum
예제 #8
0
def test_search_imported_ref_files():

    mass_spectrum_obj = get_mass_spectrum()
    
    ref_file_location = os.path.join(os.getcwd(),  os.path.normcase("tests/tests_data/")) + "SRFA.ref"

    mf_references_list = ImportMassListRef(ref_file_location).from_bruker_ref_file()

    for mf in mf_references_list:

        print(mf.mass, mf.classe)
    
    ms_peaks_assigned = SearchMolecularFormulas(mass_spectrum_obj).search_mol_formulas( mf_references_list, find_isotopologues=False)

    assert (len(ms_peaks_assigned)) > 0
예제 #9
0
def run_assignment(file_location, field_strength=12):
    # mass_spectrum = get_masslist(file_location)

    mass_spectrum, transient_time = run_bruker(file_location)
    set_parameters(mass_spectrum, field_strength=field_strength, pos=False)
    mass_spectrum.filter_by_max_resolving_power(field_strength, transient_time)

    SearchMolecularFormulas(mass_spectrum,
                            first_hit=False).run_worker_mass_spectrum()

    mass_spectrum.percentile_assigned(report_error=True)
    mass_spectrum.molecular_search_settings.score_method = "prob_score"
    mass_spectrum.molecular_search_settings.output_score_method = "prob_score"

    mass_spectrum.to_csv(mass_spectrum.sample_name, write_metadata=False)
예제 #10
0
def search_nsox(mass_spectrum_obj):

    filter_by_resolving_power()
    
    MSParameters.molecular_search.usedAtoms['O'] = (1, 10)
    MSParameters.molecular_search.usedAtoms['N'] = (1, 3)
    MSParameters.molecular_search.usedAtoms['S'] = (1, 3)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0, 0)
    
    MSParameters.molecular_search.min_dbe = 0
    MSParameters.molecular_search.max_dbe = 50
    
    MSParameters.molecular_search.isProtonated = True
    MSParameters.molecular_search.isRadical = True
    MSParameters.molecular_search.isAdduct = True

    SearchMolecularFormulas(mass_spectrum_obj, first_hit=True).run_worker_mass_spectrum()
예제 #11
0
    def find_most_abundant_formula(self, mass_spectrum_obj):
        '''
        find most abundant using kendrick 
        
        Returns
        ----------
        MolecularFormula class obj
            most abundant MolecularFormula with the lowest mass error
        '''
        #need to find a better way to cut off outliners
        #import matplotlib.pyplot as plt
        #plt.hist(mass_spectrum_obj.abundance, bins=100)
        #plt.show()

        abundances = mass_spectrum_obj.abundance
        abun_mean = average(abundances, axis=0)
        abun_std = std(abundances, axis=0)

        upper_limit = abun_mean + 7 * abun_std

        print(
            "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
            % (upper_limit, max(mass_spectrum_obj,
                                key=lambda m: m.abundance).abundance))

        mspeak_most_abundant = max(mass_spectrum_obj,
                                   key=lambda m: m.abundance
                                   if m.abundance <= upper_limit else 0)

        print("Searching molecular formulas")

        SearchMolecularFormulas(mass_spectrum_obj,
                                self.sql_db).run_worker_ms_peaks(
                                    [mspeak_most_abundant])

        print("Finished searching molecular formulas")

        if mspeak_most_abundant:

            return mspeak_most_abundant.best_molecular_formula_candidate

        else:

            raise Exception(
                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
                % mspeak_most_abundant.mz_exp)
예제 #12
0
def search_sx(mass_spectrum_obj):

    #print(len(mass_spectrum_obj), 'before kendrick filter')
    filter_by_resolving_power()
    #print(len(mass_spectrum_obj), 'after kendrick filter')
    #print(len(mass_spectrum_obj), 'after resolving power filter')

    MSParameters.molecular_search.usedAtoms['O'] = (0,0)
    MSParameters.molecular_search.usedAtoms['N'] = (0, 0)
    MSParameters.molecular_search.usedAtoms['S'] = (1, 3)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0, 0)
    #MSParameters.molecular_search.usedAtoms['F'] = (0, 1)
    #MSParameters.molecular_search.usedAtoms['P'] = (0, 0)
    
    
    MSParameters.molecular_search.min_dbe = 0
    MSParameters.molecular_search.max_dbe = 50
  
    SearchMolecularFormulas(mass_spectrum_obj, first_hit=True).run_worker_mass_spectrum()
예제 #13
0
def run_assignment(file_location):

    #mass_spectrum = run_bruker(file_location)
    mass_spectrum = get_masslist(file_location)

    mass_spectrum.molecular_search_settings.error_method = 'None'
    mass_spectrum.molecular_search_settings.min_ppm_error = -1
    mass_spectrum.molecular_search_settings.max_ppm_error = 1

    mass_spectrum.molecular_search_settings.url_database = "postgres://*****:*****@localhost:5432/molformula"
    mass_spectrum.molecular_search_settings.min_dbe = 0
    mass_spectrum.molecular_search_settings.max_dbe = 50

    mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1, 100)
    mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4, 200)
    mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1, 22)
    mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Cl'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Br'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['P'] = (0, 0)
    mass_spectrum.molecular_search_settings.usedAtoms['Na'] = (0, 0)
    mass_spectrum.molecular_search_settings.isProtonated = True
    mass_spectrum.molecular_search_settings.isRadical = False
    mass_spectrum.molecular_search_settings.isAdduct = False

    #mass_spectrum.filter_by_max_resolving_power(15, 2)
    SearchMolecularFormulas(mass_spectrum,
                            first_hit=False).run_worker_mass_spectrum()
    mass_spectrum.percentile_assigned(report_error=True)

    mass_spectrum.to_csv("15T_Neg_ESI_SRFA")

    export_calc_isotopologues(mass_spectrum,
                              "15T_Neg_ESI_SRFA_Calc_Isotopologues")

    mass_spectrum_by_classes = HeteroatomsClassification(
        mass_spectrum, choose_molecular_formula=True)
    mass_spectrum_by_classes.plot_ms_assigned_unassigned()

    #plt.show()
    # dataframe = mass_spectrum_by_classes.to_dataframe()
    return (mass_spectrum, mass_spectrum_by_classes)
예제 #14
0
def test_mspeak_search():

    mass_spec_obj = create_mass_spectrum()
    
    print("OK")

    mspeak_obj = mass_spec_obj.most_abundant_mspeak
    
    SearchMolecularFormulas(mass_spec_obj).run_worker_ms_peaks([mspeak_obj])

    print("OK2")
    if mspeak_obj.is_assigned:
        
        print(mspeak_obj.molecular_formula_earth_filter().string)
        print(mspeak_obj.molecular_formula_water_filter().string)
        print(mspeak_obj.molecular_formula_air_filter().string)
        print(mspeak_obj.cia_score_S_P_error().string)
        print(mspeak_obj.cia_score_N_S_P_error().string)
        print(mspeak_obj.best_molecular_formula_candidate.string)
        print(mspeak_obj[0].mz_error, mspeak_obj[0].string_formated)
예제 #15
0
    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):

        #this function is intended for test only.
        # Have to sort by Kendrick to be able to select the most abundant series
        #then select the most abundant peak inside the series
        #or have the user select the reference mspeak on the gui

        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak

        SearchMolecularFormulas(mass_spectrum_obj,
                                self.sql_db).run_worker_ms_peaks(
                                    [mspeak_most_abundant])

        if mspeak_most_abundant:

            return mspeak_most_abundant.best_molecular_formula_candidate

        else:
            raise Exception(
                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
                % mspeak_most_abundant.mz_exp)
예제 #16
0
def assign_mf_nsox(mass_spectrum_obj):
    
    #print(len(mass_spectrum_obj), 'before kendrick filter')
    filter_by_resolving_power()
    #print(len(mass_spectrum_obj), 'after kendrick filter')
    #print(len(mass_spectrum_obj), 'after resolving power filter')

    MSParameters.molecular_search.usedAtoms['O'] = (4, 20)
    MSParameters.molecular_search.usedAtoms['N'] = (1, 3)
    MSParameters.molecular_search.usedAtoms['S'] = (1, 5)
    MSParameters.molecular_search.usedAtoms['Cl'] = (0, 0)
        
    
    MSParameters.molecular_search.min_dbe = 0
    MSParameters.molecular_search.max_dbe = 36
    
    MSParameters.molecular_search.isProtonated = True
    MSParameters.molecular_search.isRadical = True
    MSParameters.molecular_search.isAdduct = True

    SearchMolecularFormulas(mass_spectrum_obj, first_hit=True).run_worker_mass_spectrum(mass_spectrum_obj,)
예제 #17
0
def run_nmdc_workflow(args):
    # mass_spectrum = get_masslist(file_location)
    file_location, ref_calibration_file, field_strength = args

    if field_strength == 21:

        # return "21T", None
        # print("{}   {}".format("21T", file_location))
        print("{} {}  {}".format("processing", field_strength, file_location))
        mass_spectrum, transient_time = run_thermo(file_location)

    else:

        print("{} {}  {}".format("processing", field_strength, file_location))
        mass_spectrum, transient_time = run_bruker(file_location)
        # return "not 21T", None

    is_pos = True if mass_spectrum.polarity > 0 else False

    if len(mass_spectrum) < 30:

        print("{}   {}".format("too few peaks", file_location))
        return "too few peaks", None

    set_parameters(mass_spectrum, field_strength=field_strength, pos=is_pos)

    if ref_calibration_file:

        calspec(mass_spectrum, ref_calibration_file)
        # MzDomainCalibration(mass_spectrum, ref_calibration_file).run()

    # mass_spectrum.filter_by_max_resolving_power(field_strength, transient_time)

    SearchMolecularFormulas(mass_spectrum,
                            first_hit=False).run_worker_mass_spectrum()
    mass_spectrum.percentile_assigned(report_error=True)
    mass_spectrum.molecular_search_settings.score_method = "prob_score"
    mass_spectrum.molecular_search_settings.output_score_method = "prob_score"

    return "all_good", mass_spectrum
예제 #18
0
def test_heteroatoms_classification():

    MSParameters.molecular_search.error_method = 'None'
    MSParameters.molecular_search.min_ppm_error  = -10
    MSParameters.molecular_search.max_ppm_error = 10
    MSParameters.molecular_search.mz_error_range = 1
    MSParameters.molecular_search.isProtonated = True 
    MSParameters.molecular_search.isRadical= False 
    MSParameters.molecular_search.isAdduct= False 
    
    MSParameters.molecular_search.usedAtoms['C'] = (1, 100)
    MSParameters.molecular_search.usedAtoms['H'] = (4, 200)
    MSParameters.molecular_search.usedAtoms['O'] = (1, 18)
    #MSParameters.molecular_search.usedAtoms = usedatoms
    
    mass_spec_obj = create_mass_spectrum()
    
    assignOx = SearchMolecularFormulas(mass_spec_obj).run_worker_mass_spectrum()
    
    #test classification 
    mass_spec_obj.percentile_assigned()

    mass_spectrum_by_classes = HeteroatomsClassification(mass_spec_obj)

    mass_spectrum_by_classes.plot_ms_assigned_unassigned()
    
    mass_spectrum_by_classes.atoms_ratio_all("H", "C")

    mass_spectrum_by_classes.dbe_all()

    mass_spectrum_by_classes.carbon_number_all()

    mass_spectrum_by_classes.abundance_assigned()

    mass_spectrum_by_classes.mz_exp_assigned()

    mass_spectrum_by_classes.abundance_count_percentile(Labels.unassigned)

    mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned)
예제 #19
0
def run_assignment(file_location, workflow_params):

    file_path = Path(file_location)

    if file_path.suffix == '.raw':

        first_scan, last_scan = workflow_params.raw_file_start_scan, workflow_params.raw_file_final_scan
        mass_spectrum = run_thermo_reduce_profile(file_location,
                                                  workflow_params, first_scan,
                                                  last_scan)

    elif file_path.suffix == '.d':

        mass_spectrum = run_bruker_transient(file_location,
                                             workflow_params.corems_json_path)

    elif file_path.suffix == '.txt' or file_path.suffix == 'csv':

        mass_spectrum = get_masslist(file_location,
                                     workflow_params.corems_json_path,
                                     polarity=workflow_params.polarity,
                                     is_centroid=workflow_params.is_centroid)

    mass_spectrum.set_parameter_from_json(workflow_params.corems_json_path)

    if workflow_params.calibrate:

        ref_file_location = Path(workflow_params.calibration_ref_file_path)

        MzDomainCalibration(mass_spectrum, ref_file_location).run()

    # force it to one job. daemon child can not have child process
    mass_spectrum.molecular_search_settings.db_jobs = 1

    SearchMolecularFormulas(mass_spectrum,
                            first_hit=False).run_worker_mass_spectrum()

    return mass_spectrum
예제 #20
0
    list_dict = []

    dirnames = get_dirnames()

    if dirnames:

        for file_location in dirnames:

            print(file_location)

            mass_spectrum = get_mass_spectrum(file_location)

            set_settings_for_bromothymol_blue(mass_spectrum)
            #set_settings_for_chlorophenol_red(mass_spectrum)

            SearchMolecularFormulas(mass_spectrum,
                                    first_hit=True).run_worker_mass_spectrum()

            #mass_error_prediction = MassErrorPrediction(mass_spectrum)

            #mass_error_prediction.get_results()

            ax = mass_spectrum.plot_mz_domain_profile()
            #plt.show()

            for mspeak in mass_spectrum:

                if mspeak:

                    for mf in mspeak:

                        ax.plot(mspeak.mz_exp,
예제 #21
0
#access the transient object
bruker_transient_obj = bruker_reader.get_transient()

#calculates the transient duration time
T =  bruker_transient_obj.transient_time

#access the mass spectrum object
mass_spectrum_obj = bruker_transient_obj.get_mass_spectrum(plot_result=False, auto_process=True)

# - search monoisotopic molecular formulas for all mass spectral peaks
# - calculate fine isotopic structure based on monoisotopic molecular formulas found and current dynamic range
# - search molecular formulas of correspondent calculated isotopologues,
# - settings are stored at SearchConfig.json and can be changed directly on the file or inside the framework class

SearchMolecularFormulas(mass_spectrum_obj, first_hit=False).run_worker_mass_spectrum()

# iterate over mass spectral peaks objs
for mspeak in mass_spectrum_obj.sort_by_abundance():

    # returns true if there is at least one molecular formula associated
    # with the mass spectral peak
    # same as mspeak.is_assigned -- > bool
    if  mspeak:

        # get the molecular formula with the highest mass accuracy
        molecular_formula = mspeak.molecular_formula_lowest_error

        # plot mz and peak height, use mass_spectrum_obj.mz_exp to access all mz
        # and mass_spectrum_obj.mz_exp_profile to access mz with all available datapoints
        pyplot.plot(mspeak.mz_exp, mspeak.abundance, 'o', c='g')
예제 #22
0
def single_process(mf_references_dict: Dict[str, Dict[float,
                                                      List[MolecularFormula]]],
                   datapath: Path, current_mix: str, mf_results_dic: dict):

    plt.rcParams["figure.figsize"] = (16, 8)

    #get target compounds mz and molecular formulas
    dict_tarrget_mzs = mf_references_dict.get(current_mix)

    target_mzs = dict_tarrget_mzs.keys()

    lcms_obj, parser = run_thermo(datapath, target_mzs)

    target_mzs = parser.selected_mzs

    #TODO need to convert this to a lcms object
    scan_number_mass_spectrum = {}

    results_list = []
    # mz is from calculate mz

    tic_data, ax_tic = lcms_obj.get_tic(ms_type='MS !d',
                                        peak_detection=True,
                                        smooth=True,
                                        plot=False)

    eics_data, ax_eic = lcms_obj.get_eics(tic_data,
                                          smooth=True,
                                          plot=False,
                                          legend=False,
                                          peak_detection=True,
                                          ax=ax_tic)

    lcms_obj.process_ms1(dict_tarrget_mzs)

    #_write_frame_to_new_sheet(path_to_file="HILIC NEG Results.xlsx", sheet_name='all_eic_results', data=results_list)
    # TODO: create lcms and add dependent scans based on scan number
    # Add Adducts search, right now only working for de or protonated species
    # Export function with csv files

    precision_decimals = 0

    ms_peaks_assigned = SearchMolecularFormulasLC(
        lcms_obj).run_target_worker_ms1()

    for eic_peak in lcms_obj:

        dependent_scans = parser.iRawDataPlus.GetScanDependents(
            eic_peak.apex_scan, precision_decimals)

        mass_spectcrum_obj = eic_peak.mass_spectrum

        percursordata = {}

        for scan_dependent_detail in dependent_scans.ScanDependentDetailArray:

            for precursor_mz in scan_dependent_detail.PrecursorMassArray:

                percursordata[precursor_mz] = scan_dependent_detail.ScanIndex

        #print(scan, [(mf.name, mf.mz_calc) for mf in mf_references_list], percursordata)
        #print()
        #print(scan, mass_spectcrum_obj.retention_time)
        #print(mf_references_list)
        #SearchMolecularFormulas(mass_spectcrum_obj).run_worker_ms1()

        #for precursor_mz in percursordata.keys():

        #ax = mass_spectcrum_obj.plot_mz_domain_profile()
        is_assigned = False
        #target_title = 'Target Molecule(s) = '

        #for peak in mass_spectcrum_obj:

        #    for mf in peak:
        #        is_assigned = True

        #        if not mf.is_isotopologue:
        #            target_title += "{}-{} m/z = {:.4f}".format(mf.name, mf.string_formated, mf.protonated_mz)

        #        annotation = "Mol. Form = {}\nm\z = {:.4f}\nerror = {:.4f}\nconfidence score = {:.2f}\nisotopologue score = {:.2f}".format(mf.string_formated, peak.mz_exp, mf.mz_error, mf.confidence_score, mf.isotopologue_similarity)

        #        ax.annotate(annotation , xy=(peak.mz_exp, peak.abundance),
        #                                    xytext=(+3, np.sign(peak.abundance)*-40), textcoords="offset points",
        #                                    horizontalalignment="left",
        #                                    verticalalignment="bottom" if peak.abundance > 0 else "top")

        #if is_assigned:

        #    dir = Path(str(datapath.parent).replace('RAW Files', 'Results MS2 Noise Threshould'))
        #    if not dir.exists():
        #        dir.mkdir(parents=True, exist_ok=True)

        #    ms1_output_file = '{}_{}_{}'.format(scan, 'MS1', datapath.stem)

        #    ax.set_title("Retention Time = {:.3f} {}".format(mass_spectcrum_obj.retention_time, target_title), fontsize=9,)
        #    plt.tight_layout()
        #    #plt.show()
        #    plt.savefig(str(dir) + '/' + ms1_output_file + '.png')
        #    plt.clf()

        #    mass_spectcrum_obj.to_csv(str(dir) + '/' + ms1_output_file)

        #else:

        #    plt.clf()

        scan = eic_peak.apex_scan
        for peak in mass_spectcrum_obj:

            for mf in peak:

                if not mf.is_isotopologue:

                    #error = MZSearch.calc_mz_error(mf.mz_calc, precursor_mz)

                    #check_error = MZSearch.check_ppm_error(LCMSParameters.lcms_obj.eic_tolerance_ppm, error)

                    #if check_error:
                    print(scan, mass_spectcrum_obj.retention_time, mf.name,
                          mf.mz_calc, mf.mz_error, mf.confidence_score,
                          mf.isotopologue_similarity)
                    #print(peak.mz_exp, precursor_mz, percursordata.get(peak.mz_exp))

                    dependent_scans = parser.iRawDataPlus.GetScanDependents(
                        scan, precision_decimals)

                    selected_for_ms2 = False

                    for scan_dependent_detail in dependent_scans.ScanDependentDetailArray:

                        for index, precursor_mz in enumerate(
                                scan_dependent_detail.PrecursorMassArray):

                            error_ppm_window = (scan_dependent_detail.
                                                IsolationWidthArray[index] /
                                                precursor_mz) * 1000000

                            error = MZSearch.calc_mz_error(
                                mf.mz_calc, precursor_mz)

                            check_error = MZSearch.check_ppm_error(
                                error_ppm_window, error)

                            if check_error:

                                selected_for_ms2 = True

                                print(
                                    precursor_mz,
                                    scan_dependent_detail.ScanIndex,
                                    scan_dependent_detail.
                                    IsolationWidthArray[index],
                                    scan_dependent_detail.FilterString)

                                parser.chromatogram_settings.start_scan = scan_dependent_detail.ScanIndex
                                parser.chromatogram_settings.end_scan = scan_dependent_detail.ScanIndex

                                ms2_mass_spec = parser.get_centroid_msms_data(
                                    scan_dependent_detail.ScanIndex)
                                ax = ms2_mass_spec.plot_mz_domain_profile()

                                ax.set_title(
                                    "Retention Time = {:.2f}, Precursor m/z = {:.4f}, Isolation window m/z = {:.1f} \
                                                 Target Molecule = {} m/z = {:.4f} Molecular formula {}\n  "
                                    .format(
                                        eic_peak.retention_time, precursor_mz,
                                        scan_dependent_detail.
                                        IsolationWidthArray[index], mf.name,
                                        mf.mz_calc, mf.string_formated),
                                    fontsize=9,
                                )

                                #ms_peaks_assigned = SearchMolecularFormulas(mass_spectcrum_obj).search_mol_formulas( mf_references_list, ion_type, find_isotopologues=True)
                                used_atoms = {
                                    'C': (1, mf.get('C')),
                                    'H': (1, mf.get('H'))
                                }

                                for atoms, value in mf.class_dict.items():
                                    used_atoms[atoms] = (0, value)

                                print(used_atoms)

                                ms2_mass_spec.molecular_search_settings.usedAtoms = used_atoms
                                ms2_mass_spec.molecular_search_settings.min_ppm_error = -15  #parser.chromatogram_settings.eic_tolerance_ppm
                                ms2_mass_spec.molecular_search_settings.max_ppm_error = 15  #parser.chromatogram_settings.eic_tolerance_ppm
                                ms2_mass_spec.molecular_search_settings.use_min_peaks_filter = False
                                ms2_mass_spec.molecular_search_settings.use_runtime_kendrick_filter = False
                                ms2_mass_spec.molecular_search_settings.min_hc_filter = -np.inf
                                ms2_mass_spec.molecular_search_settings.max_hc_filter = np.inf

                                ms2_mass_spec.molecular_search_settings.min_oc_filter = -np.inf
                                ms2_mass_spec.molecular_search_settings.max_oc_filter = np.inf

                                ms2_mass_spec.molecular_search_settings.isRadical = False
                                SearchMolecularFormulas(
                                    ms2_mass_spec, find_isotopologues=False
                                ).run_worker_mass_spectrum()

                                fragment_mz = []
                                fragment_formulas = []
                                fragment_error = []
                                cumulative_neutral_loss = []

                                for msmspeak in ms2_mass_spec:

                                    for mf_msms in msmspeak:

                                        fragment_mz.append(
                                            round(msmspeak.mz_exp, 6))
                                        fragment_formulas.append(
                                            mf_msms.string)
                                        fragment_error.append(mf_msms.mz_error)
                                        cumulative_neutral_loss.append(
                                            mf.subtract_formula(mf_msms))

                                        annotation = "{} {:.4f}".format(
                                            mf_msms.string, mf_msms.mz_error)
                                        ax.annotate(
                                            annotation,
                                            xy=(msmspeak.mz_exp,
                                                msmspeak.abundance),
                                            xytext=(
                                                -3,
                                                np.sign(msmspeak.abundance) *
                                                -3),
                                            textcoords="offset points",
                                            horizontalalignment="left",
                                            verticalalignment="bottom" if
                                            msmspeak.abundance > 0 else "top")
                                        print(mf_msms, mf_msms.mz_error,
                                              mf.subtract_formula(mf_msms))

                                ms2_output_file = '{}_{}_{}'.format(
                                    scan_dependent_detail.ScanIndex, 'MS2',
                                    datapath.stem)

                                result = {
                                    'Mix Name':
                                    current_mix,
                                    'Data Set':
                                    datapath.stem,
                                    'Compound Name':
                                    mf.name,
                                    'MS1 Scan':
                                    scan,
                                    'Retention Time':
                                    mass_spectcrum_obj.retention_time,
                                    'm/z':
                                    peak.mz_exp,
                                    'm/z Calculated':
                                    mf.mz_calc,
                                    'Mol. Formula':
                                    mf.string,
                                    'm/z Error':
                                    mf.mz_error,
                                    'Ion Type':
                                    mf.ion_type,
                                    'Confidence Score':
                                    mf.confidence_score,
                                    'Isotopologue Score':
                                    mf.isotopologue_similarity,
                                    'm/z Precursor':
                                    precursor_mz,
                                    'Isolation Window':
                                    scan_dependent_detail.
                                    IsolationWidthArray[index],
                                    'MS2 Scan':
                                    scan_dependent_detail.ScanIndex,
                                    'MS2 m/z':
                                    fragment_mz,
                                    'MS2 Mol. Formulas':
                                    fragment_formulas,
                                    'MS2 m/z error':
                                    fragment_error,
                                    'Cumulative Neutral Loss':
                                    cumulative_neutral_loss,
                                    'MS1 Output':
                                    'ms1_output_file',
                                    'MS2 Output':
                                    ms2_output_file
                                }

                                dir = Path(
                                    str(datapath.parent).replace(
                                        'RAW Files',
                                        'Results MS2 Noise Threshould'))

                                if not dir.exists():
                                    dir.mkdir(parents=True, exist_ok=True)

                                ms2_mass_spec.to_csv(
                                    str(dir) + '/' + ms2_output_file)

                                if mf.name not in mf_results_dic.keys():

                                    mf_results_dic[mf.name] = [result]

                                else:

                                    mf_results_dic[mf.name].append(result)

                                plt.tight_layout()
                                plt.savefig(
                                    str(dir) + '/' + ms2_output_file + '.png')
                                #plt.show()
                                plt.clf()

                    # save results without the fragmentation
                    if not selected_for_ms2:

                        result = {
                            'Mix Name': current_mix,
                            'Data Set': datapath.stem,
                            'Compound Name': mf.name,
                            'MS1 Scan': scan,
                            'Retention Time':
                            mass_spectcrum_obj.retention_time,
                            'm/z': peak.mz_exp,
                            'm/z Calculated': mf.mz_calc,
                            'Mol. Formula': mf.string,
                            'm/z Error': mf.mz_error,
                            'Ion Type': mf.ion_type,
                            'Confidence Score': mf.confidence_score,
                            'Isotopologue Score': mf.isotopologue_similarity,
                            'm/z Precursor': None,
                            'Isolation Window': None,
                            'MS2 Scan': None,
                            'MS2 m/z': None,
                            'MS2 Mol. Formulas': None,
                            'MS2 m/z error': None,
                            'Cumulative Neutral Loss': None,
                            'MS1 Output': 'ms1_output_file',
                            'MS2 Output': None
                        }

                        if mf.name not in mf_results_dic.keys():

                            mf_results_dic[mf.name] = [result]

                        else:

                            mf_results_dic[mf.name].append(result)

    return mf_results_dic

    for molecule_name, data in dict_res.items():

        _write_frame_to_new_sheet(path_to_file='C18 Results.xlsx',
                                  sheet_name='molecular_formula_results',
                                  data=data)
예제 #23
0
def test_old_calibration():
    
    ''' Mass calibration test module:
            - creates a mass spectrum object
            - find oxygen most abundant peaks separated by 14Da
            - calibrate on frequency domain using ledford equation
            - filter data based on kendrick mass with CH2O base
            - search for all molecular formula candidates

        Returns
        -------
        Nothing
            
            Store the results inside the mass spectrum class 
            (See Docs for the structural details)  
    '''
    usedatoms = {'C': (1,100) , 'H': (4,200), 'O': (1,10)}

    MSParameters.molecular_search.error_method = 'None'
    MSParameters.molecular_search.min_ppm_error  = -5
    MSParameters.molecular_search.max_ppm_error = 5
    MSParameters.molecular_search.mz_error_range = 1
    MSParameters.molecular_search.isProtonated = True 
    MSParameters.molecular_search.isRadical= True 
    MSParameters.molecular_search.usedAtoms = usedatoms
    mass_spectrum = create_mass_spectrum()

    find_formula_thread = FindOxygenPeaks(mass_spectrum)
    find_formula_thread.run()
    #find_formula_thread.join()
    
    mspeaks_results = find_formula_thread.get_list_found_peaks()
    
    calibrate = FreqDomain_Calibration(mass_spectrum, mspeaks_results)
    calibrate.linear()
    calibrate.step_fit()
    calibrate.quadratic(iteration=True)
    calibrate.ledford_calibration()
    
    MSParameters.molecular_search.error_method = 'symmetrical'
    MSParameters.molecular_search.min_ppm_error  = -3
    MSParameters.molecular_search.max_ppm_error = 3
    MSParameters.molecular_search.mz_error_range = 1
    MSParameters.molecular_search.mz_error_average = 0
    MSParameters.molecular_search.min_abun_error = -30 # percentage 
    MSParameters.molecular_search.max_abun_error = 70 # percentage 
    MSParameters.molecular_search.isProtonated = True 
    MSParameters.molecular_search.isRadical= True 
    
    MSParameters.molecular_search.usedAtoms = {'C': (1, 100),
                 'H': (4, 200),
                 'O': (0, 20),
                 'N': (0, 1),
                 'S': (0, 0),
                 'P': (0, 0),
                 }
    
    #print(len(mass_spectrum))
    ClusteringFilter().filter_kendrick(mass_spectrum)
    #print(len(mass_spectrum))
   
    SearchMolecularFormulas(mass_spectrum).run_worker_mass_spectrum()
    ClusteringFilter().remove_assignment_by_mass_error(mass_spectrum)  
예제 #24
0
def single_process(mf_references_dict: Dict[str, Dict[float, List[MolecularFormula]]], datapath: Path, current_mix: str, mf_results_dic: dict):

    plt.rcParams["figure.figsize"] = (16,8)

    #get target compounds mz and molecular formulas
    dict_tarrget_mzs = mf_references_dict.get(current_mix)   
    
    target_mzs = dict_tarrget_mzs.keys()

    eics_data, parser = run_thermo(datapath, target_mzs)

    #TODO need to convert this to a lcms object
    scan_number_mass_spectrum = {}
    
    results_list = []
    # mz is from calculate mz
    for mz, eic_data in eics_data.items():
        
        #all possible m/z from the same mix, should be one per m/z as per current lib
        possible_mf = dict_tarrget_mzs.get(mz)
        
        if eic_data.apexes:                    
            
            dict_res = {}

            names = [mf_obj.name for mf_obj in possible_mf]
            molecular_formulae = [mf_obj.string for mf_obj in possible_mf]
            rts = [eic_data.time[apex[1]] for apex in eic_data.apexes]
            scans = [eic_data.scans[apex[1]] for apex in eic_data.apexes]
            peak_height = [eic_data.eic[apex[1]] for apex in eic_data.apexes]

            #print("m/z =  {}, formulas = {}, names = {}, peaks indexes = {}, retention times = {}, abundance = {}".format(mz,
            #                                                                        molecular_formulae,
            #                                                                        names,
            #                                                                        scans,
            #                                                                        rts,
            #                                                                        peak_height) )
            dict_res["Mix Name"] = current_mix
            dict_res["Dataset"] = datapath.stem
            dict_res["Compound Name"] = names[0]
            dict_res["Neutral Formula"] = molecular_formulae[0]
            dict_res["Target m/z (de)protonated"] = round(mz,6)
            dict_res["Retention Times"] = rts
            dict_res["Scans"] = scans
            dict_res["Peak Height"] = peak_height
            
            results_list.append(dict_res)

            for peak_index in eic_data.apexes:
           
                apex_index = peak_index[1]
                retention_time = eic_data.time[apex_index]
                original_scan = eic_data.scans[apex_index]
                
                if original_scan in scan_number_mass_spectrum.keys():
                    
                    scan_number_mass_spectrum[original_scan][1].extend(possible_mf)

                    
                else:
                    
                    parser.chromatogram_settings.start_scan = original_scan
                    parser.chromatogram_settings.end_scan = original_scan
                    
                    mass_spec = parser.get_average_mass_spectrum_in_scan_range()
                    
                    mass_spec.min_ppm_error = - 5
                    mass_spec.max_ppm_error = 5

                    mass_spec.retention_time = retention_time
                    scan_number_mass_spectrum[original_scan] = [mass_spec, [i for i in possible_mf]]
                    #mass_spec.plot_mz_domain_profile()
                    #plt.show()
                    
    _write_frame_to_new_sheet(path_to_file="HILIC NEG Results.xlsx", sheet_name='all_eic_results', data=results_list)
    # TODO: create lcms and add dependent scans based on scan number 
    # Add Adducts search, right now only working for de or protonated species
    # Export function with csv files
    
    ion_type = Labels.protonated_de_ion
    
    precision_decimals = 0

    for scan, ms_mf in scan_number_mass_spectrum.items():
        
        dependent_scans = parser.iRawDataPlus.GetScanDependents(scan, precision_decimals)

        mass_spectcrum_obj = ms_mf[0]
        mf_references_list = ms_mf[1]
        
        percursordata = {}

        for scan_dependent_detail in dependent_scans.ScanDependentDetailArray:
            
            for precursor_mz in scan_dependent_detail.PrecursorMassArray:
                
                percursordata[precursor_mz] = scan_dependent_detail.ScanIndex
        
        #print(scan, [(mf.name, mf.mz_calc) for mf in mf_references_list], percursordata)
        #print()
        #print(scan, mass_spectcrum_obj.retention_time)
        #print(mf_references_list)
        ms_peaks_assigned = SearchMolecularFormulas(mass_spectcrum_obj).search_mol_formulas( mf_references_list, ion_type, find_isotopologues=True)
        
        #for precursor_mz in percursordata.keys():
        
        ax = mass_spectcrum_obj.plot_mz_domain_profile() 
        is_assigned = False
        target_title = 'Target Molecule(s) = '
        
        for peak in mass_spectcrum_obj:
            
            for mf in peak:
                is_assigned = True
                
                if not mf.is_isotopologue:
                    target_title += "{}-{} m/z = {:.4f}".format(mf.name, mf.string_formated, mf.protonated_mz)
                
                annotation = "Mol. Form = {}\nm\z = {:.4f}\nerror = {:.4f}\nconfidence score = {:.2f}\nisotopologue score = {:.2f}".format(mf.string_formated, peak.mz_exp, mf.mz_error, mf.confidence_score, mf.isotopologue_similarity)
                
                ax.annotate(annotation , xy=(peak.mz_exp, peak.abundance),
                                            xytext=(+3, np.sign(peak.abundance)*-40), textcoords="offset points",
                                            horizontalalignment="left",
                                            verticalalignment="bottom" if peak.abundance > 0 else "top")

        
        if is_assigned:
            
            dir = Path(str(datapath.parent).replace('RAW Files', 'Results No Mix Overlap'))
            if not dir.exists():
                dir.mkdir(parents=True, exist_ok=True)

            ms1_output_file = '{}_{}_{}'.format(scan, 'MS1', datapath.stem)

            ax.set_title("Retention Time = {:.3f} {}".format(mass_spectcrum_obj.retention_time, target_title), fontsize=9,)
            plt.tight_layout()
            #plt.show()
            plt.savefig(str(dir) + '/' + ms1_output_file + '.png')
            plt.clf()

           
        
            mass_spectcrum_obj.to_csv(str(dir) + '/' + ms1_output_file) 

            
        
        else:
            
            plt.clf()

        for peak in mass_spectcrum_obj:
            
            for mf in peak:
                
                if not mf.is_isotopologue:
                
                        #error = MZSearch.calc_mz_error(mf.mz_calc, precursor_mz)

                        #check_error = MZSearch.check_ppm_error(LCMSParameters.lc_ms.eic_tolerance_ppm, error)
                        
                        #if check_error:
                        print('YEAHHHHH')
                        print(scan, mass_spectcrum_obj.retention_time, mf.name, mf.mz_calc, mf.mz_error, mf.confidence_score, mf.isotopologue_similarity)  
                        #print(peak.mz_exp, precursor_mz, percursordata.get(peak.mz_exp))
                        
                        dependent_scans = parser.iRawDataPlus.GetScanDependents(scan, precision_decimals)
                        
                        selected_for_ms2 = False
                        
                        for scan_dependent_detail in dependent_scans.ScanDependentDetailArray:
                            
                            for index, precursor_mz in enumerate(scan_dependent_detail.PrecursorMassArray):
                                
                                error_ppm_window = (scan_dependent_detail.IsolationWidthArray[index]/precursor_mz) *1000000

                                error = MZSearch.calc_mz_error(mf.mz_calc, precursor_mz)

                                check_error = MZSearch.check_ppm_error(error_ppm_window, error)

                                if  check_error:
                                    
                                    selected_for_ms2 = True
                                    
                                    print(precursor_mz,scan_dependent_detail.ScanIndex, scan_dependent_detail.IsolationWidthArray[index],  scan_dependent_detail.FilterString)
                            
                                    parser.chromatogram_settings.start_scan = scan_dependent_detail.ScanIndex
                                    parser.chromatogram_settings.end_scan = scan_dependent_detail.ScanIndex
                                    
                                    ms2_mass_spec = parser.get_centroid_msms_data(scan_dependent_detail.ScanIndex)
                                    ax = ms2_mass_spec.plot_mz_domain_profile()
                                    
                                    ax.set_title("Retention Time = {:.2f}, Precursor m/z = {:.4f}, Isolation window m/z = {:.1f} \
                                                 Target Molecule = {} m/z = {:.4f} Molecular formula {}\n  ".format(mass_spec.retention_time,
                                                                                                                precursor_mz, scan_dependent_detail.IsolationWidthArray[index],
                                                                                                                mf.name, mf.mz_calc, mf.string_formated), fontsize=9,)
                                                                                                    
                                    #ms_peaks_assigned = SearchMolecularFormulas(mass_spectcrum_obj).search_mol_formulas( mf_references_list, ion_type, find_isotopologues=True)
                                    used_atoms = {'C' : (1, mf.get('C')), 'H': (1, mf.get('H')) }    
                                    
                                    for atoms, value in mf.class_dict.items():
                                        used_atoms[atoms] = (0, value)
                                    
                                    print(used_atoms)
                                    
                                    ms2_mass_spec.molecular_search_settings.usedAtoms = used_atoms
                                    ms2_mass_spec.molecular_search_settings.min_ppm_error = -15 #parser.chromatogram_settings.eic_tolerance_ppm
                                    ms2_mass_spec.molecular_search_settings.max_ppm_error = 15 #parser.chromatogram_settings.eic_tolerance_ppm
                                    ms2_mass_spec.molecular_search_settings.use_min_peaks_filter = False
                                    ms2_mass_spec.molecular_search_settings.use_runtime_kendrick_filter = False
                                    ms2_mass_spec.molecular_search_settings.min_hc_filter = -np.inf
                                    ms2_mass_spec.molecular_search_settings.max_hc_filter = np.inf

                                    ms2_mass_spec.molecular_search_settings.min_oc_filter = -np.inf
                                    ms2_mass_spec.molecular_search_settings.max_oc_filter = np.inf
                                    
                                    ms2_mass_spec.molecular_search_settings.isRadical = False
                                    SearchMolecularFormulas(ms2_mass_spec, find_isotopologues=False).run_worker_mass_spectrum()

                                    fragment_mz = []
                                    fragment_formulas = []
                                    fragment_error = []
                                    cumulative_neutral_loss = []

                                    for msmspeak in ms2_mass_spec:
                                        
                                        for mf_msms in msmspeak:
                                            fragment_mz.append(round(msmspeak.mz_exp,6))
                                            fragment_formulas.append(mf_msms.string)
                                            fragment_error.append(mf_msms.mz_error)
                                            cumulative_neutral_loss.append(mf.subtract_formula(mf_msms))

                                            annotation = "{} {:.4f}".format(mf_msms.string, mf_msms.mz_error)
                                            ax.annotate(annotation , xy=(msmspeak.mz_exp, msmspeak.abundance),
                                                xytext=(-3, np.sign(msmspeak.abundance)*-3), textcoords="offset points",
                                                horizontalalignment="left",
                                                verticalalignment="bottom" if msmspeak.abundance > 0 else "top")
                                            print(mf_msms, mf_msms.mz_error, mf.subtract_formula(mf_msms))

                                    ms2_output_file = '{}_{}_{}'.format(scan_dependent_detail.ScanIndex, 'MS2', datapath.stem)

                                    result = {'Mix Name': current_mix, 'Data Set': datapath.stem, 'Compound Name': mf.name, 
                                        'MS1 Scan': scan, 'Retention Time': mass_spectcrum_obj.retention_time, 
                                        'm/z': peak.mz_exp, 'm/z Calculated': mf.mz_calc, 'Mol. Formula' : mf.string,  'm/z Error': mf.mz_error, 'Ion Type': mf.ion_type, 
                                        'Confidence Score':  mf.confidence_score, 'Isotopologue Score': mf.isotopologue_similarity, 'm/z Precursor': precursor_mz, 
                                        'Isolation Window': scan_dependent_detail.IsolationWidthArray[index], 'MS2 Scan': scan_dependent_detail.ScanIndex, 
                                        'MS2 m/z': fragment_mz, 'MS2 Mol. Formulas': fragment_formulas, 'MS2 m/z error':fragment_error, 'Cumulative Neutral Loss': cumulative_neutral_loss,
                                        'MS1 Output': ms1_output_file, 'MS2 Output': ms2_output_file}
                                    
                                    
                                    dir = Path(str(datapath.parent).replace('RAW Files', 'Results MS2 Noise Threshould'))
                                    
                                    if not dir.exists():
                                        dir.mkdir(parents=True, exist_ok=True)
                                    
                                    ms2_mass_spec.to_csv(str(dir) + '/' + ms2_output_file) 
                
                                    if mf.name not in mf_results_dic.keys():

                                        mf_results_dic[mf.name] = [result]
                                    
                                    else:    
                                        
                                        mf_results_dic[mf.name].append(result)

                                            
                                    plt.tight_layout()
                                    plt.savefig(str(dir) + '/' + ms2_output_file+'.png')
                                    #plt.show() 
                                    plt.clf()
                                     

                        # save results without the fragmentation
                        if not selected_for_ms2:
                            
                            result = {'Mix Name': current_mix, 'Data Set': datapath.stem, 'Compound Name': mf.name, 
                                              'MS1 Scan': scan, 'Retention Time': mass_spectcrum_obj.retention_time, 
                                              'm/z': peak.mz_exp, 'm/z Calculated': mf.mz_calc, 'Mol. Formula' : mf.string,  'm/z Error': mf.mz_error, 'Ion Type': mf.ion_type, 
                                              'Confidence Score':  mf.confidence_score, 'Isotopologue Score': mf.isotopologue_similarity, 'm/z Precursor': None, 
                                              'Isolation Window': None, 'MS2 Scan': None,
                                              'MS2 m/z': None, 'MS2 Mol. Formulas': None, 'MS2 m/z error':None, 'Cumulative Neutral Loss': None, 
                                              'MS1 Output': ms1_output_file, 'MS2 Output': None}
                                                
                        
                            if mf.name not in mf_results_dic.keys():

                                mf_results_dic[mf.name] = [result]
                            
                            else:    
                                
                                mf_results_dic[mf.name].append(result)

    return mf_results_dic

    for molecule_name, data in dict_res.items():

        _write_frame_to_new_sheet(path_to_file= 'C18 Results.xlsx', sheet_name='molecular_formula_results', data=data)
예제 #25
0
    def find_series_mspeaks(self,
                            mass_spectrum_obj,
                            molecular_formula_obj_reference,
                            deltamz=14):

        abundances = mass_spectrum_obj.abundance
        abun_mean = average(abundances, axis=0)
        abun_std = std(abundances, axis=0)
        upper_limit = abun_mean + 7 * abun_std

        list_most_abundant_peaks = list()

        min_mz = mass_spectrum_obj.min_mz_exp

        max_mz = mass_spectrum_obj.max_mz_exp

        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc

        mass = initial_nominal_mass

        nominal_masses = []
        while mass <= max_mz:
            #print "shit 1", mass, min_mz
            mass += (deltamz)
            nominal_masses.append(mass)

        mass = initial_nominal_mass
        while mass >= min_mz:
            #print "shit 1", mass, min_mz
            mass -= (deltamz)
            nominal_masses.append(mass)

        nominal_masses = sorted(nominal_masses)

        for nominal_mass in nominal_masses:

            first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(
                nominal_mass)

            ms_peaks = mass_spectrum_obj[first_index:last_index]

            if ms_peaks:
                '''    
                print (nominal_mass, first_index, 
                    last_index, 
                    mass_spectrum_obj[first_index].mz_exp,
                    mass_spectrum_obj[last_index].mz_exp
                    )
                '''

                mspeak_most_abundant = max(
                    ms_peaks,
                    key=lambda m: m.abundance
                    if m.abundance <= upper_limit else 0)

                #mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)

                list_most_abundant_peaks.append(mspeak_most_abundant)

        print('Start molecular formula search')
        SearchMolecularFormulas(
            mass_spectrum_obj,
            self.sql_db).run_worker_ms_peaks(list_most_abundant_peaks)
        print('Done molecular formula search')
        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]