from pyms.Noise.SavitzkyGolay import savitzky_golay from pyms.TopHat import tophat # In[2]: cwd = PathPlus('.').resolve() data_directory = PathPlus('.').resolve().parent.parent / "datafiles" # Change this if the data files are stored in a different location output_directory = cwd / "output" # In[3]: jcamp_file = data_directory / "gc01_0812_066.jdx" data = JCAMP_reader(jcamp_file) data.trim("500s", "2000s") tic = data.tic im = build_intensity_matrix(data) # In[4]: n_scan, n_mz = im.size for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) # In[5]:
andi_file = data_directory / "gc01_0812_066.cdf" data1 = ANDI_reader(andi_file) print(data1) # In[3]: jcamp_file = data_directory / "gc01_0812_066.jdx" data2 = JCAMP_reader(jcamp_file) print(data2) # To compare the two data sets, use the function |diff()| # In[4]: from pyms.GCMS.Function import diff diff(data1, data2) # If the data cannot be compared, for example because of # different number of scans, or inconsistent number of m/z values # in between two scans, |diff()| # will report the difference. For example: # In[5]: data2.trim(begin=1000, end=2000) # In[6]: diff(data1, data2)
jcamp_file = data_directory / "gc01_0812_066.jdx" data = JCAMP_reader(jcamp_file) im = build_intensity_matrix(data) # ## Retention time range # # A basic operation on the GC-MS data is to select a specific time range for # processing. In PyMassSpec, any data outside the chosen time range is discarded. # The |trim()| method operates on the raw data, so any subsequent processing only # refers to the trimmed data. # # The data can be trimmed to specific scans: # In[3]: data.trim(1000, 2000) data.info() # or specific retention times (in ``seconds`` or ``minutes``): # In[4]: data.trim("700s", "15m") data.info() # ## Mass Spectrum range and entries # # An |IntensityMatrix| object has a set mass range and interval that is derived # from the data at the time of building the intensity matrix. The range of mass # values can be cropped. This is done, primarily, to ensure that the range of # masses used are consistent when comparing samples.