def generate_chromatogram(n=5, twin=None): """ Generates a trace with n gaussian peaks distributed through it. """ if twin is None: twin = (0, 60) t = np.linspace(twin[0], twin[1], 300) peak_locs = twin[1] * np.random.random(n) peak_ws = 0.2 + 0.8 * np.random.random(n) peak_hs = 0.2 + 0.8 * np.random.random(n) y = np.zeros(len(t)) for peak_loc, peak_w, peak_h in zip(peak_locs, peak_ws, peak_hs): y += gaussian(t, x=peak_loc, w=peak_w, h=peak_h) y += np.random.normal(scale=0.01, size=len(t)) return Trace(y, t, ['X'])
def generate_gaussian(): t = np.linspace(0, 60, 300) y = gaussian(t, x=30, w=2, h=1) return Trace(y, t, ['X'])
def read_peaks(db, filename, ftype='isodat'): if ftype is None: with open(filename, 'r') as f: header = f.readline() if 'd 13C/12C[per mil]vs. VPDB' in header: ftype = 'isodat' else: ftype = 'amdis' if ftype == 'amdis': delim = '\t' cvtr = {'name': 'name', 'p-s-time': 'rt', 'p-s-area': 'area'} elif ftype == 'isodat': delim = ',' cvtr = {'name': 'peak nr.', 'p-s-time': 'rt[s]', 'p-s-area': 'area all[vs]', 'p-s-width': 'width[s]', 'p-s-d13c': 'd 13c/12c[per mil]vs. vpdb', 'p-s-d18o': 'd 18o/16o[per mil]vs. vsmow'} headers = None mapping = defaultdict(list) ref_pk_info = {} def get_val(line, cols, key): return line.split(delim)[cols.index(key)] with open(filename, 'r') as f: for line in f: if bool(re.match('filename' + delim, line, re.I)) or headers is None: headers = line.lower().split(',') continue fn = get_val(line, headers, 'filename') if ftype == 'amdis': # AMDIS has '.FIN' sufffixes and other stuff, so # munge Filename to get it into right format cmp_lvl = 2 fn = op.splitext('/'.join(fn.split('\\')[-cmp_lvl:]))[0] # find if filtered filename overlaps with anything in the db for dt in db.children_of_type('file'): if fn in '/'.join(dt.rawdata.split(op.sep)): break else: continue info = {} # load all the predefined fields for k in cvtr: info[k] = get_val(line, headers, cvtr[k]) # create peak shapes for plotting if ftype == 'isodat': rt = float(info['p-s-time']) / 60. width = float(info['p-s-width']) / 60. t = np.linspace(rt - width, rt + width) data = [] for ion in ['44', '45', '46']: area = float(get_val(line, headers, 'rarea ' + ion + '[mvs]')) / 60. # bgd = float(get_val(line, headers, \ # 'bgd ' + ion + '[mv]')) height = float(get_val(line, headers, 'ampl. ' + ion + '[mv]')) # save the height at 44 into the info for linearity if ion == '44': info['p-s-ampl44'] = height # 0.8 is a empirical number to make things look better data.append(gaussian(t, x=rt, w=0.5 * area / height, h=height)) # save info if this is the main ref gas peak if info['name'].endswith('*'): ref_pk_info[dt] = info ts = Chromatogram(np.array(data).T, t, [44, 45, 46]) else: ts = Chromatogram(np.array([np.nan]), np.array([np.nan]), ['']) mapping[dt] += [Peak(info, ts)] # do drift correction if ftype == 'isodat': for dt in mapping: ref_pks = [] hgt44 = ref_pk_info[dt]['p-s-ampl44'] d18o = float(ref_pk_info[dt]['p-s-d18o']) d13c = float(ref_pk_info[dt]['p-s-d13c']) for pk in mapping[dt]: # if the d18o and height are similar, it's a ref peak if abs(pk.info['p-s-ampl44'] - hgt44) < 10. and \ abs(float(pk.info['p-s-d18o']) - d18o) < 2.: ref_pks.append(pk) # get out the dd13C values and times for the ref gas peaks d13cs = [float(pk.info['p-s-d13c']) for pk in ref_pks] dd13cs = np.array(d13cs) - d13c rts = [float(pk.info['p-s-time']) for pk in ref_pks] # try to fit a linear model through all of them p0 = [d13cs[0], 0] def errfunc(p, x, y): return p[0] + p[1] * x - y try: p, succ = leastsq(errfunc, p0, args=(np.array(rts), dd13cs)) except: p = p0 # apply the linear model to get the dd13C linearity correction # for a given time and add it to the value of this peak for pk in mapping[dt]: pk.info['p-s-d13c'] = str(-errfunc(p, float(pk.info['p-s-time']), float(pk.info['p-s-d13c']))) # save everything with db: for dt in mapping: dt.children += mapping[dt]
def read_peaks(db, filename, ftype='isodat'): if ftype is None: with open(filename, 'r') as f: header = f.readline() if 'd 13C/12C[per mil]vs. VPDB' in header: ftype = 'isodat' else: ftype = 'amdis' if ftype == 'amdis': delim = '\t' cvtr = {'name': 'name', 'p-s-time': 'rt', 'p-s-area': 'area'} elif ftype == 'isodat': delim = ',' cvtr = {'name': 'peak nr.', 'p-s-time': 'rt[s]', 'p-s-area': 'area all[vs]', 'p-s-width': 'width[s]', 'p-s-d13c': 'd 13c/12c[per mil]vs. vpdb', 'p-s-d18o': 'd 18o/16o[per mil]vs. vsmow'} headers = None mapping = defaultdict(list) ref_pk_info = {} def get_val(line, cols, key): return line.split(delim)[cols.index(key)] with open(filename, 'r') as f: for line in f: if bool(re.match('filename' + delim, line, re.I)) or headers is None: headers = line.lower().split(',') continue fn = get_val(line, headers, 'filename') if ftype == 'amdis': # AMDIS has '.FIN' sufffixes and other stuff, so # munge Filename to get it into right format cmp_lvl = 2 fn = op.splitext('/'.join(fn.split('\\')[-cmp_lvl:]))[0] # find if filtered filename overlaps with anything in the db for dt in db.children_of_type('file'): if fn in '/'.join(dt.rawdata.split(op.sep)): break else: continue info = {} # load all the predefined fields for k in cvtr: info[k] = get_val(line, headers, cvtr[k]) # create peak shapes for plotting if ftype == 'isodat': rt = float(info['p-s-time']) / 60. width = float(info['p-s-width']) / 60. t = np.linspace(rt - width, rt + width) data = [] for ion in ['44', '45', '46']: area = float(get_val(line, headers, 'rarea ' + ion + '[mvs]')) / 60. # bgd = float(get_val(line, headers, \ # 'bgd ' + ion + '[mv]')) height = float(get_val(line, headers, 'ampl. ' + ion + '[mv]')) # save the height at 44 into the info for linearity if ion == '44': info['p-s-ampl44'] = height # 0.8 is a empirical number to make things look better data.append(gaussian(t, x=rt, w=0.5 * area / height, h=height)) # save info if this is the main ref gas peak if info['name'].endswith('*'): ref_pk_info[dt] = info ts = Chromatogram(np.array(data).T, t, [44, 45, 46]) else: ts = Chromatogram(np.array([np.nan]), np.array([np.nan]), ['']) mapping[dt] += [Peak(info, ts)] # do drift correction if ftype == 'isodat': for dt in mapping: ref_pks = [] hgt44 = ref_pk_info[dt]['p-s-ampl44'] d18o = float(ref_pk_info[dt]['p-s-d18o']) d13c = float(ref_pk_info[dt]['p-s-d13c']) for pk in mapping[dt]: # if the d18o and height are similar, it's a ref peak if abs(pk.info['p-s-ampl44'] - hgt44) < 10. and \ abs(float(pk.info['p-s-d18o']) - d18o) < 2.: ref_pks.append(pk) # get out the dd13C values and times for the ref gas peaks d13cs = [float(pk.info['p-s-d13c']) for pk in ref_pks] dd13cs = np.array(d13cs) - d13c rts = [float(pk.info['p-s-time']) for pk in ref_pks] # try to fit a linear model through all of them p0 = [d13cs[0], 0] def errfunc(p, x, y): return p[0] + p[1] * x - y try: p, succ = leastsq(errfunc, p0, args=(np.array(rts), dd13cs)) except Exception: p = p0 # apply the linear model to get the dd13C linearity correction # for a given time and add it to the value of this peak for pk in mapping[dt]: pk.info['p-s-d13c'] = str(-errfunc(p, float(pk.info['p-s-time']), float(pk.info['p-s-d13c']))) # save everything with db: for dt in mapping: dt.children += mapping[dt]
def plot(self, style='heatmap', legend=False, cmap=None, ax=None): """ Presents the AstonFrame using matplotlib. Parameters ---------- style : {'heatmap', 'colors', ''} legend : bool, optional cmap: matplotlib.colors.Colormap, optional ax : matplotlib.axes.Axes, optional """ # styles: 2d, colors, otherwise interpret as trace? if ax is None: import matplotlib.pyplot as plt ax = plt.gca() if style == 'heatmap': ions = self.columns ext = (self.index[0], self.index[-1], min(ions), max(ions)) grid = self.values[:, np.argsort(self.columns)].transpose() if isinstance(self.values, scipy.sparse.spmatrix): grid = grid.toarray() img = ax.imshow(grid, origin='lower', aspect='auto', extent=ext, cmap=cmap) if legend: ax.figure.colorbar(img) elif style == 'colors': # TODO: importing gaussian at the top leads to a whole # mess of dependency issues => fix somehow? from aston.peak.peak_models import gaussian from matplotlib.colors import ListedColormap wvs = np.genfromtxt(np.array(self.columns).astype(bytes)) # wvs = self.columns.astype(float) # http://www.ppsloan.org/publications/XYZJCGT.pdf vis_filt = np.zeros((3, len(wvs))) vis_filt[0] = 1.065 * gaussian(wvs, x=595.8, w=33.33) + \ 0.366 * gaussian(wvs, x=446.8, w=19.44) vis_filt[1] = 1.014 * gaussian( np.log(wvs), x=np.log(556.3), w=0.075) vis_filt[2] = 1.839 * gaussian( np.log(wvs), x=np.log(449.8), w=0.051) if isinstance(self.values, scipy.sparse.spmatrix): xyz = np.dot(self.values.toarray(), vis_filt.T) else: xyz = np.dot(self.values.copy(), vis_filt.T) # http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html xyz_rgb = [[3.2404542, -1.5371385, -0.4985314], [-0.9692660, 1.8760108, 0.0415560], [0.0556434, -0.2040259, 1.0572252]] xyz_rgb = np.array(xyz_rgb) rgb = np.dot(xyz_rgb, xyz.T).T # normalize rgb[rgb < 0] = 0 rgb /= np.max(rgb) rgb = 1 - np.abs(rgb) # plot cmask = np.meshgrid(np.arange(rgb.shape[0]), 0)[0] ax.imshow(cmask, cmap=ListedColormap(rgb), aspect='auto', extent=(self.index[0], self.index[-1], 0, 1)) ax.yaxis.set_ticks([]) else: if cmap is not None: color = cmap(0, 1) else: color = 'k' self.trace().plot(color=color, ax=ax)
def plot(self, style='heatmap', legend=False, cmap=None, ax=None): """ Presents the AstonFrame using matplotlib. Parameters ---------- style : {'heatmap', 'colors', ''} legend : bool, optional cmap: matplotlib.colors.Colormap, optional ax : matplotlib.axes.Axes, optional """ # styles: 2d, colors, otherwise interpret as trace? if ax is None: import matplotlib.pyplot as plt ax = plt.gca() if style == 'heatmap': ions = self.columns ext = (self.index[0], self.index[-1], min(ions), max(ions)) grid = self.values[:, np.argsort(self.columns)].transpose() if isinstance(self.values, scipy.sparse.spmatrix): grid = grid.toarray() img = ax.imshow(grid, origin='lower', aspect='auto', extent=ext, cmap=cmap) if legend: ax.figure.colorbar(img) elif style == 'colors': # TODO: importing gaussian at the top leads to a whole # mess of dependency issues => fix somehow? from aston.peak.peak_models import gaussian from matplotlib.colors import ListedColormap wvs = np.genfromtxt(np.array(self.columns).astype(bytes)) # wvs = self.columns.astype(float) # http://www.ppsloan.org/publications/XYZJCGT.pdf vis_filt = np.zeros((3, len(wvs))) vis_filt[0] = 1.065 * gaussian(wvs, x=595.8, w=33.33) + \ 0.366 * gaussian(wvs, x=446.8, w=19.44) vis_filt[1] = 1.014 * gaussian(np.log(wvs), x=np.log(556.3), w=0.075) vis_filt[2] = 1.839 * gaussian(np.log(wvs), x=np.log(449.8), w=0.051) if isinstance(self.values, scipy.sparse.spmatrix): xyz = np.dot(self.values.toarray(), vis_filt.T) else: xyz = np.dot(self.values.copy(), vis_filt.T) # http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html xyz_rgb = [[3.2404542, -1.5371385, -0.4985314], [-0.9692660, 1.8760108, 0.0415560], [0.0556434, -0.2040259, 1.0572252]] xyz_rgb = np.array(xyz_rgb) rgb = np.dot(xyz_rgb, xyz.T).T # normalize rgb[rgb < 0] = 0 rgb /= np.max(rgb) rgb = 1 - np.abs(rgb) # plot cmask = np.meshgrid(np.arange(rgb.shape[0]), 0)[0] ax.imshow(cmask, cmap=ListedColormap(rgb), aspect='auto', extent=(self.index[0], self.index[-1], 0, 1)) ax.yaxis.set_ticks([]) else: if cmap is not None: color = cmap(0, 1) else: color = 'k' self.trace().plot(color=color, ax=ax)