Ejemplo n.º 1
0
def detect_one_run(run, args):
    infile = os.path.join(args.indir, run)
    print "processing GC-MS file:", infile

    # sys.stdout("processing GCSM run:", run)
    # load the input GC-MS file
    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim(args.trimstart + "m", args.trimend + "m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        im = build_intensity_matrix_i(data)

        # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing,
        # but i can't seem to get the TIC for the smoothed IM.
        peak_list = call_peaks(im, tic, True, args)
        return peak_list, run
Ejemplo n.º 2
0
def detect_one_run(run, args):
    infile = os.path.join(args.indir, run)
    print "processing GC-MS file:", infile

   # sys.stdout("processing GCSM run:", run)
    # load the input GC-MS file
    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim(args.trimstart+"m",args.trimend+"m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        im = build_intensity_matrix_i(data)

        # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing,
        # but i can't seem to get the TIC for the smoothed IM.
        peak_list = call_peaks(im, tic, True, args)
        return peak_list, run
Ejemplo n.º 3
0
def load_run(infile):

    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim("4.0m", "20.0m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        return build_intensity_matrix_i(data), tic
Ejemplo n.º 4
0
def load_run(infile):

    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim("4.0m", "20.0m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        return build_intensity_matrix_i(data), tic
Ejemplo n.º 5
0
def missing_peak_finder(sample, andi_file, points=7, null_ions=[73, 207],\
                            crop_ions=[45,300], threshold=100000, rt_window=10):
    """
    @summary: Integrates raw data around missing peak locations
              to fill in NAs in the data matrix

    @param  sample: The sample object containing missing peaks
    @type sample: pyms.MissingPeak.Class.Sample

    @param  andi_file: Name of the raw data file
    @type andi_file: stringType

    @param  points: Peak finding - Peak if maxima over 'points' \
                    number of scans (Default 3)
    @type points: intType

    @param  null_ions: Ions to be deleted in the matrix
    @type null_ions: listType

    @param crop_ions: Range of Ions to be considered
    @type crop_ions: listType

    @param threshold: Minimum intensity of IonChromatogram allowable to fill\
                      missing peak
    @type threshold: intType

    @param  rt_window: Window in seconds around average RT to look for \
                       missing peak
    @type rt_window: floatType

    @author: Sean O'Callaghan
    """

    ### some error checks on null and crop ions

    ### a for root,files,dirs in os.path.walk(): loop
    print "Sample:", sample.get_name(), "andi_file:", andi_file

    data = ANDI_reader(andi_file)

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.get_missing_peaks():
        #JT: Debug peak attributes
        #attrs = vars(mp)
        #print ', '.join("%s: %s" % item for item in attrs.items())
        mp_rt = mp.get_rt()
        #print(repr(mp_rt))
        common_ion = mp.get_ci()
        qual_ion_1 = float(mp.get_qual_ion1())
        qual_ion_2 = float(mp.get_qual_ion2())

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        #print "ci = ",common_ion
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        #print "qi1 = ", qual_ion_1
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        #print "qi2 = ", qual_ion_2
        ######
        # Integrate the CI around that particular RT
        #######

        #Convert time to points
        # How long between scans?

        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window)
        #print "rt_window = ", points_1 - points_2

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
                                                  rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold / 2 and q2_intensity > threshold / 2:
                    large_peaks.append([rt, intens])

        #print('found %d peaks above threshold'%len(large_peaks))

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.get_intensity_array().tolist()
            area, left, fight, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)

        ########################
        areas.sort()
        if len(areas) > 0:
            biggest_area = areas[-1]
            mp.set_ci_area(biggest_area)
            #print "found area:", biggest_area, "at rt:", mp_rt
        else:
            #print "Missing peak at rt = ", mp_rt
            mp.set_ci_area('NA')
Ejemplo n.º 6
0
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold

from pyms.Experiment.Class import Experiment
from pyms.Experiment.IO import store_expr


# deconvolution and peak list filtering parameters
points = 9; scans = 2; n = 3; t = 3000; r = 2;

andi_file = "/x/PyMS/data/a0806_077.cdf"

data = ANDI_reader(andi_file)

# integer mass
im = build_intensity_matrix_i(data)

# get the size of the intensity matrix
n_scan, n_mz = im.get_size()

# smooth data
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic1 = savitzky_golay(ic)
    ic_smooth = savitzky_golay(ic1)
    ic_base = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_base)

# do peak detection on pre-trimmed data

# get the list of Peak objects
Ejemplo n.º 7
0
from pyms.Baseline.TopHat import tophat
from pyms.Display.Class import Display
from pyms.Peak.Function import peak_sum_area
from pyms.Peak.IO import store_peaks
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold
from pyms.Simulator.Function import gcms_sim, add_gaussc_noise

# read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim(4101, 4350)

# Build Intensity Matrix
real_im = build_intensity_matrix_i(data)

n_scan, n_mz = real_im.get_size()

# perform necessary pre filtering
for ii in range(n_mz):
    ic = real_im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    real_im.set_ic_at_index(ii, ic_bc)

# Detect Peaks
peak_list = BillerBiemann(real_im, points=3, scans=2)

print "Number of peaks found in real data: ", len(peak_list)
Ejemplo n.º 8
0
def missing_peak_finder(sample, filename, points=13, null_ions=[73, 147],\
                            crop_ions=[50,540], threshold=1000, rt_window=1, filetype='cdf'):
    """
    @summary: Integrates raw data around missing peak locations
              to fill in NAs in the data matrix

    @param  sample: The sample object containing missing peaks
    @type sample: pyms.MissingPeak.Class.Sample

    @param  andi_file: Name of the raw data file
    @type andi_file: stringType

    @param  points: Peak finding - Peak if maxima over 'points' \
                    number of scans (Default 3) 
    @type points: intType

    @param  null_ions: Ions to be deleted in the matrix
    @type null_ions: listType

    @param crop_ions: Range of Ions to be considered
    @type crop_ions: listType 

    @param threshold: Minimum intensity of IonChromatogram allowable to fill\
                      missing peak
    @type threshold: intType

    @param  rt_window: Window in seconds around average RT to look for \
                       missing peak
    @type rt_window: floatType

    @author: Sean O'Callaghan
    """

    ### some error checks on null and crop ions

    ### a for root,files,dirs in os.path.walk(): loop
    print "Sample:", sample.get_name(), "File:", filename
    
    if filetype == 'cdf':
        data = ANDI_reader(filename)
    elif filetype == 'mzml':
        data = mzML_reader(filename)
    else:
        print "file type not valid"
    

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.get_missing_peaks():

        mp_rt = mp.get_rt()
        common_ion = mp.get_ci()
        qual_ion_1 = float(mp.get_qual_ion1())
        qual_ion_2 = float(mp.get_qual_ion2())
        

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        print "ci = ",common_ion
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        print "qi1 = ", qual_ion_1
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        print "qi2 = ", qual_ion_2
        ######
        # Integrate the CI around that particular RT
        #######

        #Convert time to points
        # How long between scans?
        
        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt)-rt_window)
        print "rt_window = ", points_1 - points_2

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
                                                  rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold/2 and q2_intensity > threshold/2:
                    large_peaks.append([rt, intens])
                
        print('found %d peaks above threshold'%len(large_peaks))

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.get_intensity_array().tolist()
            area, left, fight, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)
        ########################
        areas.sort()
        if len(areas)>0:
            biggest_area = areas[-1]
            mp.set_ci_area(biggest_area)
            print "found area:", biggest_area, "at rt:", mp_rt
        else:
            print "Missing peak at rt = ", mp_rt
            mp.set_ci_area('na')