def detect_one_run(run, args): infile = os.path.join(args.indir, run) print "processing GC-MS file:", infile # sys.stdout("processing GCSM run:", run) # load the input GC-MS file try: if args.ftype == 'CDF': from pyms.GCMS.IO.ANDI.Function import ANDI_reader data = ANDI_reader(infile) elif args.ftype == 'JDX': #data = JCAMP_reader(in_file) data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile) else: raise ValueError('can only load ANDI (CDF) or JDX files!') except: print "Failure to load input file ", infile else: data.trim(args.trimstart + "m", args.trimend + "m") # get TIC. Would prefer to get from smoothed IM but API is faulty! tic = data.get_tic() # integer mass im = build_intensity_matrix_i(data) # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing, # but i can't seem to get the TIC for the smoothed IM. peak_list = call_peaks(im, tic, True, args) return peak_list, run
def detect_one_run(run, args): infile = os.path.join(args.indir, run) print "processing GC-MS file:", infile # sys.stdout("processing GCSM run:", run) # load the input GC-MS file try: if args.ftype == 'CDF': from pyms.GCMS.IO.ANDI.Function import ANDI_reader data = ANDI_reader(infile) elif args.ftype == 'JDX': #data = JCAMP_reader(in_file) data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile) else: raise ValueError('can only load ANDI (CDF) or JDX files!') except: print "Failure to load input file ", infile else: data.trim(args.trimstart+"m",args.trimend+"m") # get TIC. Would prefer to get from smoothed IM but API is faulty! tic = data.get_tic() # integer mass im = build_intensity_matrix_i(data) # would be nice to do noise_mult*noise_level using the noise level AFTER smoothing, # but i can't seem to get the TIC for the smoothed IM. peak_list = call_peaks(im, tic, True, args) return peak_list, run
def matrix_from_cdf(cdffile, name): data = ANDI_reader(cdffile) print(name) data.info() tic = data.get_tic() noise_lvl = window_analyzer(tic) return build_intensity_matrix(data), noise_lvl
def matrix_from_cdf(cdffile): """ Converrt ANDI files(.cdf) -> intensity matrices for processing & peak detection :param cdffile: :return: list of intensity matrices """ data = ANDI_reader(cdffile) #data.info() # prints info about the data return build_intensity_matrix(data)
def matrix_from_cdf(cdffile, name): ''' Intakes a .cdf file and produces an intensity matrix and a noise level . The noise level info is obtained by producing a tic and using the window_analyzer method to extract a noise approximation. @param cdffile: Absolutepath to a .cdf file to be processed @param name: file name associated with .cdf file @return: An intensity matrix and a corresponding noise level value ''' data = ANDI_reader(cdffile) print(name) data.info() tic = data.get_tic() noise_lvl = window_analyzer(tic) print('nz=', noise_lvl) return build_intensity_matrix(data), noise_lvl
def load_run(infile): try: if args.ftype == 'CDF': from pyms.GCMS.IO.ANDI.Function import ANDI_reader data = ANDI_reader(infile) elif args.ftype == 'JDX': #data = JCAMP_reader(in_file) data = pyms.GCMS.IO.JCAMP.Function.JCAMP_OpenChrom_reader(infile) else: raise ValueError('can only load ANDI (CDF) or JDX files!') except: print "Failure to load input file ", infile else: data.trim("4.0m", "20.0m") # get TIC. Would prefer to get from smoothed IM but API is faulty! tic = data.get_tic() # integer mass return build_intensity_matrix_i(data), tic
"""proc.py """ import sys sys.path.append("/x/PyMS/") from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat # read the raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # get the TIC tic = data.get_tic() # apply noise smoothing and baseline correction tic1 = savitzky_golay(tic) tic2 = tophat(tic1, struct="1.5m") # save smoothed/baseline corrected TIC tic.write("output/tic.dat", minutes=True) tic1.write("output/tic_smooth.dat", minutes=True) tic2.write("output/tic_smooth_bc.dat", minutes=True)
in an interactive session """ import sys sys.path.append("/x/PyMS") from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix from pyms.Display.Function import plot_ic # read the raw data as a GCMS_data object andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # IntensityMatrix # must build intensity matrix before accessing any intensity matrix methods. # default, float masses with interval (bin interval) of one from min mass print "default intensity matrix, bin interval = 1, boundary +/- 0.5" im = build_intensity_matrix(data) # # IonChromatogram # # TIC from raw data
"""proc.py """ import sys sys.path.append("/x/PyMS") from pyms.GCMS.IO.ANDI.Function import ANDI_reader # read the raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # info about raw data data.info() # trim data between scans 1000 and 2000 data.trim(1000, 2000) # info about trimmed raw data data.info() # reload data = ANDI_reader(andi_file) # trim data between retention times, 6.5 minutes to 21 minutes data.trim("6.5m", "21m") # info about trimmed raw data data.info()
>>>matplotlib.matplotlib_fname() in an interactive session """ import sys sys.path.append("/x/PyMS") from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix from pyms.Display.Function import plot_ic # read the raw data as a GCMS_data object andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # IntensityMatrix # must build intensity matrix before accessing any intensity matrix methods. # default, float masses with interval (bin interval) of one from min mass print "default intensity matrix, bin interval = 1, boundary +/- 0.5" im = build_intensity_matrix(data) # # IonChromatogram # # get the ion chromatogram for some m/z channel (73)
"""proc.py """ import sys sys.path.append("/x/PyMS") from pyms.GCMS.IO.ANDI.Function import ANDI_reader # read the raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # raw data operations print "minimum mass found in all data: ", data.get_min_mass() print "maximum mass found in all data: ", data.get_max_mass() # time time = data.get_time_list() print "number of retention times: ", len(time) print "retention time of 1st scan: ", time[0], "sec" print "index of 400sec in time_list: ", data.get_index_at_time(400.0) # TIC tic = data.get_tic() print "number of scans in TIC: ", len(tic) print "start time of TIC: ", tic.get_time_at_index(0), "sec" # raw scans scans = data.get_scan_list() print "number of masses in 1st scan: ", len(scans[0])
def matrix_from_cdf(cdffile): data = ANDI_reader(cdffile) #data.info() # prints info about the data return build_intensity_matrix(data)
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix_i from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat from pyms.Peak.Class import Peak from pyms.Display.Class import Display from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \ rel_threshold, num_ions_threshold # read in raw data andi_file = "/home/projects/PyMS_Projects/Metabolomic.Data/2010.01.28_DPI_dairy_waste_water/In/In_061108_Spring_1.CDF" data = ANDI_reader(andi_file) data.trim(6m, 21m) # Build Intensity Matrix im = build_intensity_matrix_i(data) n_scan, n_mz = im.get_size() # perform necessary pre filtering for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m")
sys.path.append("/x/PyMS/") from numpy import * from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix_i from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat from pyms.Peak.IO import store_peaks from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \ rel_threshold, num_ions_threshold # read in raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) data.trim("500s", "2000s") # Build Intensity Matrix im = build_intensity_matrix_i(data) n_scan, n_mz = im.get_size() # perform necessary pre filtering for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) # Detect Peaks peak_list = BillerBiemann(im, points=9, scans=2)
sys.path.append("/x/PyMS/") from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix_i from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat from pyms.Display.Class import Display from pyms.Peak.Function import peak_sum_area from pyms.Peak.IO import store_peaks from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, rel_threshold, num_ions_threshold from pyms.Simulator.Function import gcms_sim, add_gaussv_noise # read in raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) data.trim(4101, 4350) # Build Intensity Matrix real_im = build_intensity_matrix_i(data) n_scan, n_mz = real_im.get_size() # perform necessary pre filtering for ii in range(n_mz): ic = real_im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") real_im.set_ic_at_index(ii, ic_bc)
sys.path.append("/x/PyMS/") from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix_i from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat from pyms.Display.Class import Display from pyms.Peak.Function import peak_sum_area from pyms.Peak.IO import store_peaks from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \ rel_threshold, num_ions_threshold from pyms.Simulator.Function import gcms_sim, add_gaussc_noise # read in raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) data.trim(4101, 4350) # Build Intensity Matrix real_im = build_intensity_matrix_i(data) n_scan, n_mz = real_im.get_size() # perform necessary pre filtering for ii in range(n_mz): ic = real_im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") real_im.set_ic_at_index(ii, ic_bc)
"""proc.py """ import sys sys.path.append("/x/PyMS") from pyms.GCMS.IO.ANDI.Function import ANDI_reader # read the raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) # print info data.info() # write data to output file. This will create # two ascii data tables, data.I.csv and data.mz.csv # with intensities and m/z values data.write("output/data")
def missing_peak_finder(sample, andi_file, points=7, null_ions=[73, 207],\ crop_ions=[45,300], threshold=100000, rt_window=10): """ @summary: Integrates raw data around missing peak locations to fill in NAs in the data matrix @param sample: The sample object containing missing peaks @type sample: pyms.MissingPeak.Class.Sample @param andi_file: Name of the raw data file @type andi_file: stringType @param points: Peak finding - Peak if maxima over 'points' \ number of scans (Default 3) @type points: intType @param null_ions: Ions to be deleted in the matrix @type null_ions: listType @param crop_ions: Range of Ions to be considered @type crop_ions: listType @param threshold: Minimum intensity of IonChromatogram allowable to fill\ missing peak @type threshold: intType @param rt_window: Window in seconds around average RT to look for \ missing peak @type rt_window: floatType @author: Sean O'Callaghan """ ### some error checks on null and crop ions ### a for root,files,dirs in os.path.walk(): loop print "Sample:", sample.get_name(), "andi_file:", andi_file data = ANDI_reader(andi_file) # build integer intensity matrix im = build_intensity_matrix_i(data) for null_ion in null_ions: im.null_mass(null_ion) im.crop_mass(crop_ions[0], crop_ions[1]) # get the size of the intensity matrix n_scan, n_mz = im.get_size() # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic, points) ic_smooth = savitzky_golay(ic1, points) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) for mp in sample.get_missing_peaks(): #JT: Debug peak attributes #attrs = vars(mp) #print ', '.join("%s: %s" % item for item in attrs.items()) mp_rt = mp.get_rt() #print(repr(mp_rt)) common_ion = mp.get_ci() qual_ion_1 = float(mp.get_qual_ion1()) qual_ion_2 = float(mp.get_qual_ion2()) ci_ion_chrom = im.get_ic_at_mass(common_ion) #print "ci = ",common_ion qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1) #print "qi1 = ", qual_ion_1 qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2) #print "qi2 = ", qual_ion_2 ###### # Integrate the CI around that particular RT ####### #Convert time to points # How long between scans? points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt)) points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window) #print "rt_window = ", points_1 - points_2 rt_window_points = points_1 - points_2 maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \ rt_window_points) large_peaks = [] for rt, intens in maxima_list: if intens > threshold: q1_index = qi1_ion_chrom.get_index_at_time(rt) q2_index = qi2_ion_chrom.get_index_at_time(rt) q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index) q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index) if q1_intensity > threshold / 2 and q2_intensity > threshold / 2: large_peaks.append([rt, intens]) #print('found %d peaks above threshold'%len(large_peaks)) areas = [] for peak in large_peaks: apex = ci_ion_chrom.get_index_at_time(peak[0]) ia = ci_ion_chrom.get_intensity_array().tolist() area, left, fight, l_share, r_share = ion_area(ia, apex, 0) areas.append(area) ######################## areas.sort() if len(areas) > 0: biggest_area = areas[-1] mp.set_ci_area(biggest_area) #print "found area:", biggest_area, "at rt:", mp_rt else: #print "Missing peak at rt = ", mp_rt mp.set_ci_area('NA')
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.GCMS.Function import build_intensity_matrix_i from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.Baseline.TopHat import tophat from pyms.Peak.IO import store_peaks from pyms.Deconvolution.BillerBiemann.Function import BillerBiemann, \ rel_threshold, num_ions_threshold # read in raw data andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) data.trim("500s", "2000s") # Build Intensity Matrix im = build_intensity_matrix_i(data) n_scan, n_mz = im.get_size() # perform necessary pre filtering for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc)