Ejemplo n.º 1
0
def detect_one_run(run, args):
    infile = os.path.join(args.indir, run)
    print "processing GC-MS file:", infile

    # sys.stdout("processing GCSM run:", run)
    # load the input GC-MS file
    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim(args.trimstart + "m", args.trimend + "m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        im = build_intensity_matrix_i(data)

        # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing,
        # but i can't seem to get the TIC for the smoothed IM.
        peak_list = call_peaks(im, tic, True, args)
        return peak_list, run
Ejemplo n.º 2
0
def detect_one_run(run, args):
    infile = os.path.join(args.indir, run)
    print "processing GC-MS file:", infile

   # sys.stdout("processing GCSM run:", run)
    # load the input GC-MS file
    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim(args.trimstart+"m",args.trimend+"m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        im = build_intensity_matrix_i(data)

        # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing,
        # but i can't seem to get the TIC for the smoothed IM.
        peak_list = call_peaks(im, tic, True, args)
        return peak_list, run
Ejemplo n.º 3
0
def matrix_from_cdf(cdffile, name):
    data = ANDI_reader(cdffile)
    print(name)
    data.info()
    tic = data.get_tic()
    noise_lvl = window_analyzer(tic)

    return build_intensity_matrix(data), noise_lvl
Ejemplo n.º 4
0
def matrix_from_cdf(cdffile):
    """
    Converrt ANDI files(.cdf) -> intensity matrices for processing & peak detection
    :param cdffile:
    :return: list of intensity matrices
    """
    data = ANDI_reader(cdffile)
    #data.info() # prints info about the data
    return build_intensity_matrix(data)
Ejemplo n.º 5
0
def matrix_from_cdf(cdffile, name):
    '''
    Intakes a .cdf file and produces an intensity matrix and a noise level .
    The noise level info is obtained by producing a tic and using the window_analyzer
    method to extract a noise approximation.

    @param cdffile: Absolutepath to a .cdf file to be processed
    @param name: file name associated with .cdf file
    @return: An intensity matrix and a corresponding noise level value
    '''
    data = ANDI_reader(cdffile)
    print(name)
    data.info()
    tic = data.get_tic()
    noise_lvl = window_analyzer(tic)
    print('nz=', noise_lvl)

    return build_intensity_matrix(data), noise_lvl
Ejemplo n.º 6
0
def load_run(infile):

    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim("4.0m", "20.0m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        return build_intensity_matrix_i(data), tic
Ejemplo n.º 7
0
def load_run(infile):

    try:
        if args.ftype == 'CDF':
            from pyms.GCMS.IO.ANDI.Function import ANDI_reader
            data = ANDI_reader(infile)
        elif args.ftype == 'JDX':
            #data = JCAMP_reader(in_file)
            data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile)
        else:
            raise ValueError('can only load ANDI (CDF) or JDX files!')
    except:
        print "Failure to load input file ", infile
    else:
        data.trim("4.0m", "20.0m")
        # get TIC. Would prefer to get from smoothed IM but API is faulty!
        tic = data.get_tic()
        # integer mass
        return build_intensity_matrix_i(data), tic
Ejemplo n.º 8
0
"""proc.py
"""

import sys
sys.path.append("/x/PyMS/")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

# get the TIC
tic = data.get_tic()

# apply noise smoothing and baseline correction
tic1 = savitzky_golay(tic)
tic2 = tophat(tic1, struct="1.5m")

# save smoothed/baseline corrected TIC
tic.write("output/tic.dat", minutes=True)
tic1.write("output/tic_smooth.dat", minutes=True)
tic2.write("output/tic_smooth_bc.dat", minutes=True)
Ejemplo n.º 9
0
in an interactive session
"""

import sys
sys.path.append("/x/PyMS")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix


from pyms.Display.Function import plot_ic

# read the raw data as a GCMS_data object
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)


# IntensityMatrix
# must build intensity matrix before accessing any intensity matrix methods.

# default, float masses with interval (bin interval) of one from min mass
print "default intensity matrix, bin interval = 1, boundary +/- 0.5"
im = build_intensity_matrix(data)


#
# IonChromatogram
#

# TIC from raw data
Ejemplo n.º 10
0
"""proc.py
"""

import sys
sys.path.append("/x/PyMS")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"

data = ANDI_reader(andi_file)

# info about raw data
data.info()

# trim data between scans 1000 and 2000
data.trim(1000, 2000)

# info about trimmed raw data
data.info()

# reload
data = ANDI_reader(andi_file)

# trim data between retention times, 6.5 minutes to 21 minutes
data.trim("6.5m", "21m")

# info about trimmed raw data
data.info()
Ejemplo n.º 11
0
>>>matplotlib.matplotlib_fname()

in an interactive session
"""

import sys
sys.path.append("/x/PyMS")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix

from pyms.Display.Function import plot_ic

# read the raw data as a GCMS_data object
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)


# IntensityMatrix
# must build intensity matrix before accessing any intensity matrix methods.

# default, float masses with interval (bin interval) of one from min mass
print "default intensity matrix, bin interval = 1, boundary +/- 0.5"
im = build_intensity_matrix(data)


#
# IonChromatogram
#

# get the ion chromatogram for some m/z channel (73)
Ejemplo n.º 12
0
"""proc.py
"""

import sys
sys.path.append("/x/PyMS")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

# raw data operations
print "minimum mass found in all data: ", data.get_min_mass()
print "maximum mass found in all data: ", data.get_max_mass()

# time
time = data.get_time_list()
print "number of retention times: ", len(time)
print "retention time of 1st scan: ", time[0], "sec"
print "index of 400sec in time_list: ", data.get_index_at_time(400.0)

# TIC
tic = data.get_tic()
print "number of scans in TIC: ", len(tic)
print "start time of TIC: ", tic.get_time_at_index(0), "sec"

# raw scans
scans = data.get_scan_list()

print "number of masses in 1st scan: ", len(scans[0])
Ejemplo n.º 13
0
def matrix_from_cdf(cdffile):
    data = ANDI_reader(cdffile)
    #data.info() # prints info about the data
    return build_intensity_matrix(data)
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat
from pyms.Peak.Class import Peak

from pyms.Display.Class import Display
 
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold
    

 
 # read in raw data
andi_file = "/home/projects/PyMS_Projects/Metabolomic.Data/2010.01.28_DPI_dairy_waste_water/In/In_061108_Spring_1.CDF"
data = ANDI_reader(andi_file)

data.trim(6m, 21m)

# Build Intensity Matrix
im = build_intensity_matrix_i(data)


n_scan, n_mz = im.get_size()


 # perform necessary pre filtering
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
Ejemplo n.º 15
0
sys.path.append("/x/PyMS/")
from numpy import *

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat

from pyms.Peak.IO import store_peaks

from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold

# read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim("500s", "2000s")
# Build Intensity Matrix
im = build_intensity_matrix_i(data)
n_scan, n_mz = im.get_size()

# perform necessary pre filtering
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_bc)

# Detect Peaks
peak_list = BillerBiemann(im, points=9, scans=2)
Ejemplo n.º 16
0
sys.path.append("/x/PyMS/")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat
from pyms.Display.Class import Display
from pyms.Peak.Function import peak_sum_area
from pyms.Peak.IO import store_peaks
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, rel_threshold, num_ions_threshold
from pyms.Simulator.Function import gcms_sim, add_gaussv_noise


# read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim(4101, 4350)

# Build Intensity Matrix
real_im = build_intensity_matrix_i(data)

n_scan, n_mz = real_im.get_size()

# perform necessary pre filtering
for ii in range(n_mz):
    ic = real_im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    real_im.set_ic_at_index(ii, ic_bc)
Ejemplo n.º 17
0
sys.path.append("/x/PyMS/")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat
from pyms.Display.Class import Display
from pyms.Peak.Function import peak_sum_area
from pyms.Peak.IO import store_peaks
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold
from pyms.Simulator.Function import gcms_sim, add_gaussc_noise

# read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim(4101, 4350)

# Build Intensity Matrix
real_im = build_intensity_matrix_i(data)

n_scan, n_mz = real_im.get_size()

# perform necessary pre filtering
for ii in range(n_mz):
    ic = real_im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    real_im.set_ic_at_index(ii, ic_bc)
Ejemplo n.º 18
0
"""proc.py
"""

import sys
sys.path.append("/x/PyMS")

from pyms.GCMS.IO.ANDI.Function import ANDI_reader

# read the raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

# print info
data.info()

# write data to output file. This will create
# two ascii data tables, data.I.csv and data.mz.csv
# with intensities and m/z values
data.write("output/data")

Ejemplo n.º 19
0
def missing_peak_finder(sample, andi_file, points=7, null_ions=[73, 207],\
                            crop_ions=[45,300], threshold=100000, rt_window=10):
    """
    @summary: Integrates raw data around missing peak locations
              to fill in NAs in the data matrix

    @param  sample: The sample object containing missing peaks
    @type sample: pyms.MissingPeak.Class.Sample

    @param  andi_file: Name of the raw data file
    @type andi_file: stringType

    @param  points: Peak finding - Peak if maxima over 'points' \
                    number of scans (Default 3)
    @type points: intType

    @param  null_ions: Ions to be deleted in the matrix
    @type null_ions: listType

    @param crop_ions: Range of Ions to be considered
    @type crop_ions: listType

    @param threshold: Minimum intensity of IonChromatogram allowable to fill\
                      missing peak
    @type threshold: intType

    @param  rt_window: Window in seconds around average RT to look for \
                       missing peak
    @type rt_window: floatType

    @author: Sean O'Callaghan
    """

    ### some error checks on null and crop ions

    ### a for root,files,dirs in os.path.walk(): loop
    print "Sample:", sample.get_name(), "andi_file:", andi_file

    data = ANDI_reader(andi_file)

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.get_size()

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.get_missing_peaks():
        #JT: Debug peak attributes
        #attrs = vars(mp)
        #print ', '.join("%s: %s" % item for item in attrs.items())
        mp_rt = mp.get_rt()
        #print(repr(mp_rt))
        common_ion = mp.get_ci()
        qual_ion_1 = float(mp.get_qual_ion1())
        qual_ion_2 = float(mp.get_qual_ion2())

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        #print "ci = ",common_ion
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        #print "qi1 = ", qual_ion_1
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        #print "qi2 = ", qual_ion_2
        ######
        # Integrate the CI around that particular RT
        #######

        #Convert time to points
        # How long between scans?

        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window)
        #print "rt_window = ", points_1 - points_2

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
                                                  rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold / 2 and q2_intensity > threshold / 2:
                    large_peaks.append([rt, intens])

        #print('found %d peaks above threshold'%len(large_peaks))

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.get_intensity_array().tolist()
            area, left, fight, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)

        ########################
        areas.sort()
        if len(areas) > 0:
            biggest_area = areas[-1]
            mp.set_ci_area(biggest_area)
            #print "found area:", biggest_area, "at rt:", mp_rt
        else:
            #print "Missing peak at rt = ", mp_rt
            mp.set_ci_area('NA')
Ejemplo n.º 20
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.GCMS.Function import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.Baseline.TopHat import tophat

from pyms.Peak.IO import store_peaks
 
from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \
    rel_threshold, num_ions_threshold
    


 
 # read in raw data
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

data.trim("500s", "2000s")
# Build Intensity Matrix
im = build_intensity_matrix_i(data)
n_scan, n_mz = im.get_size()


 # perform necessary pre filtering
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_bc)