コード例 #1
0
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window,
                                       degree=4)  #JT: changed to 4 from 2
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")  #JT: How does struct size work?
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level

    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)

    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(
        pl2, n=args.minions, cutoff=100000
    )  #100000 for pegBT  #200 for peg3 #minions maybe 3 instead of 4?

    #JT: Was getting very different noise cutoff values so just made it 10^5
    # Which was decided on by looking at chromatograms to find baseline noise lvl
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        #peak.null_mass(73)
        #peak.null_mass(207)     # column bleed
        #peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(
            im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
コード例 #2
0
ファイル: GCMSalign.py プロジェクト: dkainer/easyGC
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window, degree=2)
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level


    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)


    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(pl2, n=args.minions, cutoff=noise_level * args.noisemult)
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        # peak.null_mass(73)
        peak.null_mass(207)     # column bleed
        peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
コード例 #3
0
ファイル: reader.py プロジェクト: connerCpowell/PyMS_PDA
def Preprocess_IntensityMatrixes(matrixes):
    # noise removal and baseline correction of Intensity Matricies
    #input matrix list, outputs corrected matrix list

    count = 1
    for im in matrixes:

        n_s, n_mz = im.get_size()
        count += 1

        for ii in range(n_mz):

            print("Working on IC#", ii + 1, " Unit", count)
            ic = im.get_ic_at_index(ii)
            ic_smoof = savitzky_golay(ic)
            ic_bc = tophat(ic_smoof, struct='1.5m')
            im.set_ic_at_index(ii, ic_bc)

    return (matrixes)  #save to file
コード例 #4
0
def Preprocess_IntensityMatrices(matrices):
    """
    Baseline correction and smoothing of Intensity Matrices
    input matrix list, outputs corrected/"cleansed" matrix list

    @param matrices: List of matrices generated by the matrix_from_cdf method
    @return: List of matrices that have been baseline corrected & smoothed for peak detection
    """

    count = 1
    for im in matrices:

        n_s, n_mz = im.get_size()
        count += 1

        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            ic_smoof = savitzky_golay(ic)
            ic_bc = tophat(ic_smoof, struct='1.5m')
            im.set_ic_at_index(ii, ic_bc)

    return (matrices)  # save to file
コード例 #5
0
ファイル: ppB.py プロジェクト: connerCpowell/bootutils
def Preprocess_IntensityMatrixes(matrixes):
    '''
    noise removal and baseline correction of Intensity Matricies
    input matrix list, outputs corrected/"cleansed" matrix list

    @param matrixes: List of matrixes generated by the matrix_from_cdf method
    @return: List of matrixes that have been 'cleansed'
    '''

    count = 1
    for im in matrixes:

        n_s, n_mz = im.get_size()
        count += 1

        for ii in range(n_mz):
            # print("Working on IC#", ii+1, " Unit", count)
            ic = im.get_ic_at_index(ii)
            ic_smoof = savitzky_golay(ic)
            ic_bc = tophat(ic_smoof, struct='1.5m')
            im.set_ic_at_index(ii, ic_bc)

    # print(matrixes)
    return (matrixes)  # save to file
コード例 #6
0
data = ANDI_reader(andi_file)
#data.trim(2431, 2469)

# IntensityMatrix
# default, float masses with interval (bin interval) of one from min mass
print "default intensity matrix, bin interval = 1, boundary +/- 0.5"
im = build_intensity_matrix(data)
im.null_mass(73)
im.null_mass(147)

n_scan, n_mz = im.get_size()

for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_base = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_base)

# Load the experiment
exper = load_expr(expr_file)

# Load the peak list 
peak_list = exper.get_peak_list()

# Pass Ion Chromatograms into a list of ICs
n_mz = len(im.get_mass_list())
ic = []

for m in range(n_mz):
    ic.append(im.get_ic_at_index(m))
コード例 #7
0
ファイル: Function.py プロジェクト: jontay81/easyGC
def missing_peak_finder(sample, andi_file, points=7, null_ions=[73, 207],\
                            crop_ions=[45,300], threshold=100000, rt_window=10):
    """
    @summary: Integrates raw data around missing peak locations
              to fill in NAs in the data matrix

    @param  sample: The sample object containing missing peaks
    @type sample: pyms.MissingPeak.Class.Sample

    @param  andi_file: Name of the raw data file
    @type andi_file: stringType

    @param  points: Peak finding - Peak if maxima over 'points' \
                    number of scans (Default 3)
    @type points: intType

    @param  null_ions: Ions to be deleted in the matrix
    @type null_ions: listType

    @param crop_ions: Range of Ions to be considered
    @type crop_ions: listType

    @param threshold: Minimum intensity of IonChromatogram allowable to fill\
                      missing peak
    @type threshold: intType

    @param  rt_window: Window in seconds around average RT to look for \
                       missing peak
    @type rt_window: floatType

    @author: Sean O'Callaghan
    """

    ### some error checks on null and crop ions

    ### a for root,files,dirs in os.path.walk(): loop
    print "Sample:", sample.get_name(), "andi_file:", andi_file

    data = ANDI_reader(andi_file)

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.get_missing_peaks():
        #JT: Debug peak attributes
        #attrs = vars(mp)
        #print ', '.join("%s: %s" % item for item in attrs.items())
        mp_rt = mp.get_rt()
        #print(repr(mp_rt))
        common_ion = mp.get_ci()
        qual_ion_1 = float(mp.get_qual_ion1())
        qual_ion_2 = float(mp.get_qual_ion2())

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        #print "ci = ",common_ion
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        #print "qi1 = ", qual_ion_1
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        #print "qi2 = ", qual_ion_2
        ######
        # Integrate the CI around that particular RT
        #######

        #Convert time to points
        # How long between scans?

        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window)
        #print "rt_window = ", points_1 - points_2

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
                                                  rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold / 2 and q2_intensity > threshold / 2:
                    large_peaks.append([rt, intens])

        #print('found %d peaks above threshold'%len(large_peaks))

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.get_intensity_array().tolist()
            area, left, fight, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)

        ########################
        areas.sort()
        if len(areas) > 0:
            biggest_area = areas[-1]
            mp.set_ci_area(biggest_area)
            #print "found area:", biggest_area, "at rt:", mp_rt
        else:
            #print "Missing peak at rt = ", mp_rt
            mp.set_ci_area('NA')
コード例 #8
0
ファイル: proc.py プロジェクト: ma-bio21/pyms-test
# read the raw data as a GCMS_data object
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

im = build_intensity_matrix_i(data)

n_scan, n_mz = im.get_size()

print "Intensity matrix size (scans, masses):", (n_scan, n_mz)

# noise filter and baseline correct
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_bc)

# Use Biller and Biemann technique to find apexing ions at a scan
# default is maxima over three scans and not to combine with any neighbouring
# scan.
peak_list = BillerBiemann(im)

print "Number of peaks found: ", len(peak_list)


# Find apex oven 9 points and combine with neighbouring peak if two scans apex
# next to each other.
peak_list = BillerBiemann(im, points=9, scans=2)

print "Number of peaks found: ", len(peak_list)
コード例 #9
0
ファイル: proc.py プロジェクト: jontay81/pyms-test
# read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim(4101, 4350)

# Build Intensity Matrix
real_im = build_intensity_matrix_i(data)

n_scan, n_mz = real_im.get_size()

# perform necessary pre filtering
for ii in range(n_mz):
    ic = real_im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    real_im.set_ic_at_index(ii, ic_bc)

# Detect Peaks
peak_list = BillerBiemann(real_im, points=3, scans=2)

print "Number of peaks found in real data: ", len(peak_list)

######### Filter peaks###############
# Filter the peak list,
# first by removing all intensities in a peak less than a given relative
# threshold,
# then by removing all peaks that have less than a given number of ions above
# a given value

# Parameters
コード例 #10
0
"""proc.py
"""

import sys
sys.path.append("/x/PyMS/")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

# get the TIC
tic = data.get_tic()

# apply noise smoothing and baseline correction
tic1 = savitzky_golay(tic)
tic2 = tophat(tic1, struct="1.5m")

# save smoothed/baseline corrected TIC
tic.write("output/tic.dat", minutes=True)
tic1.write("output/tic_smooth.dat", minutes=True)
tic2.write("output/tic_smooth_bc.dat", minutes=True)
コード例 #11
0
ファイル: proc.py プロジェクト: ma-bio21/pyms-test
"""proc.py
"""

import sys
sys.path.append("/x/PyMS/")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

# get the TIC
tic = data.get_tic()

# apply noise smoothing and baseline correction
tic1 = savitzky_golay(tic)
tic2 = tophat(tic1, struct="1.5m")

# save smoothed/baseline corrected TIC
tic.write("output/tic.dat",minutes=True)
tic1.write("output/tic_smooth.dat",minutes=True)
tic2.write("output/tic_smooth_bc.dat",minutes=True)

コード例 #12
0
ファイル: Function.py プロジェクト: DongElkan/pyms
def missing_peak_finder(sample, filename, points=13, null_ions=[73, 147],\
                            crop_ions=[50,540], threshold=1000, rt_window=1, filetype='cdf'):
    """
    @summary: Integrates raw data around missing peak locations
              to fill in NAs in the data matrix

    @param  sample: The sample object containing missing peaks
    @type sample: pyms.MissingPeak.Class.Sample

    @param  andi_file: Name of the raw data file
    @type andi_file: stringType

    @param  points: Peak finding - Peak if maxima over 'points' \
                    number of scans (Default 3) 
    @type points: intType

    @param  null_ions: Ions to be deleted in the matrix
    @type null_ions: listType

    @param crop_ions: Range of Ions to be considered
    @type crop_ions: listType 

    @param threshold: Minimum intensity of IonChromatogram allowable to fill\
                      missing peak
    @type threshold: intType

    @param  rt_window: Window in seconds around average RT to look for \
                       missing peak
    @type rt_window: floatType

    @author: Sean O'Callaghan
    """

    ### some error checks on null and crop ions

    ### a for root,files,dirs in os.path.walk(): loop
    print "Sample:", sample.get_name(), "File:", filename
    
    if filetype == 'cdf':
        data = ANDI_reader(filename)
    elif filetype == 'mzml':
        data = mzML_reader(filename)
    else:
        print "file type not valid"
    

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.get_missing_peaks():

        mp_rt = mp.get_rt()
        common_ion = mp.get_ci()
        qual_ion_1 = float(mp.get_qual_ion1())
        qual_ion_2 = float(mp.get_qual_ion2())
        

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        print "ci = ",common_ion
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        print "qi1 = ", qual_ion_1
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        print "qi2 = ", qual_ion_2
        ######
        # Integrate the CI around that particular RT
        #######

        #Convert time to points
        # How long between scans?
        
        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt)-rt_window)
        print "rt_window = ", points_1 - points_2

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
                                                  rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold/2 and q2_intensity > threshold/2:
                    large_peaks.append([rt, intens])
                
        print('found %d peaks above threshold'%len(large_peaks))

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.get_intensity_array().tolist()
            area, left, fight, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)
        ########################
        areas.sort()
        if len(areas)>0:
            biggest_area = areas[-1]
            mp.set_ci_area(biggest_area)
            print "found area:", biggest_area, "at rt:", mp_rt
        else:
            print "Missing peak at rt = ", mp_rt
            mp.set_ci_area('na')
コード例 #13
0
ファイル: proc.py プロジェクト: jontay81/pyms-test
    # define the names of the peak file and the corresponding ANDI-MS file
    andi_file = os.path.join(base_path, expr_code + ".cdf")

    data = ANDI_reader(andi_file)

    im = build_intensity_matrix_i(data)

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic)
        ic_smooth = savitzky_golay(ic1)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    # do peak detection on pre-trimmed data

    # get the list of Peak objects
    pl = BillerBiemann(im, points, scans)

    # trim by relative intensity
    apl = rel_threshold(pl, r)

    # trim by threshold
    peak_list = num_ions_threshold(apl, n, t)

    print "\t -> Number of Peaks found:", len(peak_list)