Ejemplo n.º 1
0
def test_heatmap():
    data = get_heatmap(fid, 1000)

    assert np.allclose(data['arr'].mean(), 3790.08673939)
    assert np.allclose(data['mz_bins'][0], 30.004)
    assert np.allclose(data['rt_bins'][-1], 19.2667)

    data = get_heatmap(fid, 1000, min_mz=50)
    assert np.allclose(data['mz_bins'][0], 50.0002)
    plot_heatmap(data['arr'], data['rt_bins'], data['mz_bins'])
Ejemplo n.º 2
0
def test_heatmap():
    data = get_heatmap(fid, 1000)

    assert np.allclose(data['arr'].mean(), 3790.08673939)
    assert np.allclose(data['mz_bins'][0], 30.004)
    assert np.allclose(data['rt_bins'][-1], 19.2667)

    data = get_heatmap(fid, 1000, min_mz=50)
    assert np.allclose(data['mz_bins'][0], 50.0002)
    plot_heatmap(data['arr'], data['rt_bins'], data['mz_bins'])
def get_data_for_a_compound(mz_ref, rt_ref, what_to_get, h5file, extra_time):
    """
    A helper function to query the various metatlas data selection 
    commands for a compound defined in an experimental atlas.

    Parameters
    ----------
    MZref : a MetAtlas Object for a m/z reference Class
        this contains the m/z, m/z tolerance, and tolerance units to slice the m/z dimension
    RTref : a MetAtlas Object for a retention time reference Class
        this contains the rt min, max, peak, and units to slice the retention time dimension
    what_to_get : a list of strings
        this contains one or more of [ 'ms1_summary', 'eic', '2dhist', 'msms' ]
    h5_file : str
        Path to input_file
    polarity : int
        [0 or 1] for negative or positive ionzation
    
    Returns
    -------
    """
    #TODO : polarity should be handled in the experiment and not a loose parameter
    import numpy as np
    from metatlas import h5_query as h5q
    import tables

    #get a pointer to the hdf5 file
    fid = tables.open_file(h5file)  #TODO: should be a "with open:"

    if mz_ref.detected_polarity == 'positive':
        polarity = 1
    else:
        polarity = 0

    mz_theor = mz_ref.mz
    if mz_ref.mz_tolerance_units == 'ppm':  #convert to ppm
        ppm_uncertainty = mz_ref.mz_tolerance
    else:
        ppm_uncertainty = mz_ref.mz_tolerance / mz_ref.mz * 1e6

#     if 'min' in rt_ref.rt_units: #convert to seconds
#     rt_min = rt_ref.rt_min / 60
#     rt_max = rt_ref.rt_max / 60
#     else:
    rt_min = rt_ref.rt_min
    rt_max = rt_ref.rt_max

    mz_min = mz_theor - mz_theor * ppm_uncertainty / 1e6
    mz_max = mz_theor + mz_theor * ppm_uncertainty / 1e6

    return_data = {}

    if 'ms1_summary' in what_to_get:
        #Get Summary Data

        #First get MS1 Raw Data
        ms_level = 1
        return_data['ms1_summary'] = {}
        try:
            ms1_data = h5q.get_data(fid,
                                    ms_level=1,
                                    polarity=polarity,
                                    min_mz=mz_min,
                                    max_mz=mz_max,
                                    min_rt=rt_min,
                                    max_rt=rt_max)

            return_data['ms1_summary']['polarity'] = polarity
            return_data['ms1_summary']['mz_centroid'] = np.sum(
                np.multiply(ms1_data['i'], ms1_data['mz'])) / np.sum(
                    ms1_data['i'])
            return_data['ms1_summary']['rt_centroid'] = np.sum(
                np.multiply(ms1_data['i'], ms1_data['rt'])) / np.sum(
                    ms1_data['i'])
            idx = np.argmax(ms1_data['i'])
            return_data['ms1_summary']['mz_peak'] = ms1_data['mz'][idx]
            return_data['ms1_summary']['rt_peak'] = ms1_data['rt'][idx]
            return_data['ms1_summary']['peak_height'] = ms1_data['i'][idx]
            return_data['ms1_summary']['peak_area'] = np.sum(ms1_data['i'])
        except:
            return_data['ms1_summary']['polarity'] = []
            return_data['ms1_summary']['mz_centroid'] = []
            return_data['ms1_summary']['rt_centroid'] = []
            return_data['ms1_summary']['mz_peak'] = []
            return_data['ms1_summary']['rt_peak'] = []
            return_data['ms1_summary']['peak_height'] = []
            return_data['ms1_summary']['peak_area'] = []

    if 'eic' in what_to_get:
        #Get Extracted Ion Chromatogram
        # TODO : If a person calls for summary, then they will already have the MS1 raw data
        return_data['eic'] = {}
        try:
            rt, intensity = h5q.get_chromatogram(fid,
                                                 mz_min,
                                                 mz_max,
                                                 ms_level=ms_level,
                                                 polarity=polarity,
                                                 min_rt=rt_min - extra_time,
                                                 max_rt=rt_max + extra_time)
            return_data['eic']['rt'] = rt
            return_data['eic']['intensity'] = intensity
            return_data['eic']['polarity'] = polarity

        except:
            return_data['eic']['rt'] = []
            return_data['eic']['intensity'] = []
            return_data['eic']['polarity'] = []

    if '2dhist' in what_to_get:
        #Get 2D histogram of intensity values in m/z and retention time
        mzEdges = np.logspace(np.log10(100), np.log10(1000), 10000)
        #         mzEdges = np.linspace(mz_theor - 3, mz_theor + 30,100) #TODO : number of mz bins should be an optional parameter
        rtEdges = np.linspace(
            rt_min, rt_max, 100
        )  #TODO : number of rt bins should be an optional parameter. When not provided, it shoulddefauly to unique bins
        ms_level = 1  #TODO : ms_level should be a parameter
        return_data['2dhist'] = {}
        return_data['2dhist'] = h5q.get_heatmap(fid, mzEdges, rtEdges,
                                                ms_level, polarity)
        return_data['2dhist']['polarity'] = polarity

    if 'msms' in what_to_get:
        #Get Fragmentation Data
        ms_level = 2
        return_data['msms'] = {}
        try:
            fragmentation_data = h5q.get_data(
                fid,
                ms_level=ms_level,
                polarity=polarity,
                min_mz=0,
                max_mz=mz_theor + 2,  #TODO : this needs to be a parameter
                min_rt=rt_min,
                max_rt=rt_max,
                min_precursor_MZ=mz_min - 0.015,
                max_precursor_MZ=mz_max + 0.015
            )  #Add the 0.01 because Thermo doesn't store accurate precursor m/z
            #                     min_precursor_intensity=0, #TODO : this needs to be a parameter
            #                     max_precursor_intensity=0,#TODO : this needs to be a parameter
            #                     min_collision_energy=0,#TODO : this needs to be a parameter
            #                     max_collision_energy=0)#TODO : this needs to be a parameter
            #         prt,pmz = get_unique_scan_data(fragmentation_data)
            #         rt_cutoff = 0.23
            #         mz_cutoff = 0.05
            #         list_of_prt,list_of_pmz = get_non_redundant_precursor_list(prt,pmz,rt_cutoff,mz_cutoff)
            #         return_data['msms']['data'] = organize_msms_scan_data(fragmentation_data,list_of_prt,list_or_pmz)
            return_data['msms'][
                'most_intense_precursor'] = retrieve_most_intense_msms_scan(
                    fragmentation_data)
            return_data['msms']['data'] = fragmentation_data
            return_data['msms']['polarity'] = polarity
        except:
            return_data['msms']['most_intense_precursor'] = []
            return_data['msms']['data'] = []
            return_data['msms']['polarity'] = []

    fid.close()  #close the file
    return return_data
def get_data_for_a_compound(mz_ref,rt_ref,what_to_get,h5file,extra_time):
    """
    A helper function to query the various metatlas data selection 
    commands for a compound defined in an experimental atlas.

    Parameters
    ----------
    MZref : a MetAtlas Object for a m/z reference Class
        this contains the m/z, m/z tolerance, and tolerance units to slice the m/z dimension
    RTref : a MetAtlas Object for a retention time reference Class
        this contains the rt min, max, peak, and units to slice the retention time dimension
    what_to_get : a list of strings
        this contains one or more of [ 'ms1_summary', 'eic', '2dhist', 'msms' ]
    h5_file : str
        Path to input_file
    polarity : int
        [0 or 1] for negative or positive ionzation
    
    Returns
    -------
    """
    #TODO : polarity should be handled in the experiment and not a loose parameter
    import numpy as np
    from metatlas import h5_query as h5q
    import tables
    
    #get a pointer to the hdf5 file
    fid = tables.open_file(h5file) #TODO: should be a "with open:"

    if mz_ref.detected_polarity  == 'positive':
        polarity = 1
    else:
        polarity = 0
        
    
    mz_theor = mz_ref.mz
    if mz_ref.mz_tolerance_units  == 'ppm': #convert to ppm
        ppm_uncertainty = mz_ref.mz_tolerance
    else:
        ppm_uncertainty = mz_ref.mz_tolerance / mz_ref.mz * 1e6
        
#     if 'min' in rt_ref.rt_units: #convert to seconds
#     rt_min = rt_ref.rt_min / 60
#     rt_max = rt_ref.rt_max / 60
#     else:
    rt_min = rt_ref.rt_min
    rt_max = rt_ref.rt_max
        
    mz_min = mz_theor - mz_theor * ppm_uncertainty / 1e6
    mz_max = mz_theor + mz_theor * ppm_uncertainty / 1e6
    
    return_data = {}
    
    if 'ms1_summary' in what_to_get:
        #Get Summary Data
        
        #First get MS1 Raw Data
        ms_level=1
        return_data['ms1_summary'] = {}
        try:
            ms1_data = h5q.get_data(fid, 
                                     ms_level=1,
                                     polarity=polarity,
                                     min_mz=mz_min,
                                     max_mz=mz_max,
                                     min_rt=rt_min,
                                     max_rt=rt_max)

            return_data['ms1_summary']['polarity'] = polarity
            return_data['ms1_summary']['mz_centroid'] = np.sum(np.multiply(ms1_data['i'],ms1_data['mz'])) / np.sum(ms1_data['i'])
            return_data['ms1_summary']['rt_centroid'] = np.sum(np.multiply(ms1_data['i'],ms1_data['rt'])) / np.sum(ms1_data['i'])
            idx = np.argmax(ms1_data['i'])
            return_data['ms1_summary']['mz_peak'] = ms1_data['mz'][idx]
            return_data['ms1_summary']['rt_peak'] = ms1_data['rt'][idx]        
            return_data['ms1_summary']['peak_height'] = ms1_data['i'][idx]
            return_data['ms1_summary']['peak_area'] = np.sum(ms1_data['i'])
        except:
            return_data['ms1_summary']['polarity'] = []
            return_data['ms1_summary']['mz_centroid'] = []
            return_data['ms1_summary']['rt_centroid'] = []
            return_data['ms1_summary']['mz_peak'] = []
            return_data['ms1_summary']['rt_peak'] = []
            return_data['ms1_summary']['peak_height'] = []
            return_data['ms1_summary']['peak_area'] = []
    
    if 'eic' in what_to_get:
        #Get Extracted Ion Chromatogram
        # TODO : If a person calls for summary, then they will already have the MS1 raw data
        return_data['eic'] = {}
        try:
            rt,intensity = h5q.get_chromatogram(fid, mz_min, mz_max, ms_level=ms_level, polarity=polarity, min_rt = rt_min - extra_time, max_rt = rt_max + extra_time)
            return_data['eic']['rt'] = rt
            return_data['eic']['intensity'] = intensity
            return_data['eic']['polarity'] = polarity

        except:    
            return_data['eic']['rt'] = []
            return_data['eic']['intensity'] = []
            return_data['eic']['polarity'] = []
    
    if '2dhist' in what_to_get:
        #Get 2D histogram of intensity values in m/z and retention time
        mzEdges = np.logspace(np.log10(100),np.log10(1000),10000)
#         mzEdges = np.linspace(mz_theor - 3, mz_theor + 30,100) #TODO : number of mz bins should be an optional parameter
        rtEdges = np.linspace(rt_min,rt_max,100) #TODO : number of rt bins should be an optional parameter. When not provided, it shoulddefauly to unique bins
        ms_level = 1 #TODO : ms_level should be a parameter
        return_data['2dhist'] = {}
        return_data['2dhist'] = h5q.get_heatmap(fid,mzEdges,rtEdges,ms_level,polarity)
        return_data['2dhist']['polarity'] = polarity
    
    if 'msms' in what_to_get:
        #Get Fragmentation Data
        ms_level=2
        return_data['msms'] = {}
        try:
            fragmentation_data = h5q.get_data(fid, 
                                     ms_level=ms_level,
                                     polarity=polarity,
                                     min_mz=0,
                                     max_mz=mz_theor+2,#TODO : this needs to be a parameter
                                     min_rt=rt_min,
                                     max_rt=rt_max,
                                     min_precursor_MZ=mz_min - 0.015,
                                     max_precursor_MZ=mz_max + 0.015) #Add the 0.01 because Thermo doesn't store accurate precursor m/z
        #                     min_precursor_intensity=0, #TODO : this needs to be a parameter
        #                     max_precursor_intensity=0,#TODO : this needs to be a parameter
        #                     min_collision_energy=0,#TODO : this needs to be a parameter
        #                     max_collision_energy=0)#TODO : this needs to be a parameter
    #         prt,pmz = get_unique_scan_data(fragmentation_data)
    #         rt_cutoff = 0.23
    #         mz_cutoff = 0.05
    #         list_of_prt,list_of_pmz = get_non_redundant_precursor_list(prt,pmz,rt_cutoff,mz_cutoff)
    #         return_data['msms']['data'] = organize_msms_scan_data(fragmentation_data,list_of_prt,list_or_pmz)
            return_data['msms']['most_intense_precursor'] = retrieve_most_intense_msms_scan(fragmentation_data)
            return_data['msms']['data'] = fragmentation_data
            return_data['msms']['polarity'] = polarity
        except:
            return_data['msms']['most_intense_precursor'] = []
            return_data['msms']['data'] = []
            return_data['msms']['polarity'] = []
            
    fid.close() #close the file
    return return_data