from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.TopHat import tophat

# In[2]:

cwd = PathPlus('.').resolve()
data_directory = PathPlus('.').resolve().parent.parent / "datafiles"
# Change this if the data files are stored in a different location

output_directory = cwd / "output"

# In[3]:

jcamp_file = data_directory / "gc01_0812_066.jdx"
data = JCAMP_reader(jcamp_file)
data.trim("500s", "2000s")
tic = data.tic
im = build_intensity_matrix(data)

# In[4]:

n_scan, n_mz = im.size

for ii in range(n_mz):
	ic = im.get_ic_at_index(ii)
	ic_smooth = savitzky_golay(ic)
	ic_bc = tophat(ic_smooth, struct="1.5m")
	im.set_ic_at_index(ii, ic_bc)

# In[5]:
Ejemplo n.º 2
0
andi_file = data_directory / "gc01_0812_066.cdf"
data1 = ANDI_reader(andi_file)
print(data1)

# In[3]:

jcamp_file = data_directory / "gc01_0812_066.jdx"
data2 = JCAMP_reader(jcamp_file)
print(data2)

# To compare the two data sets, use the function |diff()|

# In[4]:

from pyms.GCMS.Function import diff

diff(data1, data2)

# If the data cannot be compared, for example because of
# different number of scans, or inconsistent number of m/z values
# in between two scans, |diff()|
# will report the difference. For example:

# In[5]:

data2.trim(begin=1000, end=2000)

# In[6]:

diff(data1, data2)
jcamp_file = data_directory / "gc01_0812_066.jdx"
data = JCAMP_reader(jcamp_file)
im = build_intensity_matrix(data)

# ## Retention time range
#
# A basic operation on the GC-MS data is to select a specific time range for
# processing. In PyMassSpec, any data outside the chosen time range is discarded.
# The |trim()| method operates on the raw data, so any subsequent processing only
# refers to the trimmed data.
#
# The data can be trimmed to specific scans:

# In[3]:

data.trim(1000, 2000)
data.info()

# or specific retention times (in ``seconds`` or ``minutes``):

# In[4]:

data.trim("700s", "15m")
data.info()

# ## Mass Spectrum range and entries
#
# An |IntensityMatrix| object has a set mass range and interval that is derived
# from the data at the time of building the intensity matrix. The range of mass
# values can be cropped. This is done, primarily, to ensure that the range of
# masses used are consistent when comparing samples.