def total_trace(self, twin=None): # TODO: use twin # TODO: only get the scans with totIonCurrent; if none found # calculate from the data r = Et.parse(self.filename).getroot() s = r.findall('*//m:scan', namespaces=self.ns) d = np.array([float(i.get('totIonCurrent')) for i in s]) t = np.array([t_to_min(i.get('retentionTime')) for i in s]) return Trace(d, t, name='TIC')
def total_trace(self, twin=None): r = Et.parse(self.filename).getroot() # get it from the chromatogram list c = r.find('.//m:cvParam[@accession="MS:1000235"]/..', namespaces=self.ns) if c is not None: q = './/m:cvParam[@accession="MS:1000595"]/..' index = self.read_binary(c.find(q, namespaces=self.ns)) q = './/m:cvParam[@accession="MS:1000515"]/..' values = self.read_binary(c.find(q, namespaces=self.ns)) return Trace(values, index, name='tic')
def loads(ast_str): """ Create a Trace from a suitably compressed string. """ data = zlib.decompress(ast_str) li = struct.unpack('<L', data[0:4])[0] lt = struct.unpack('<L', data[4:8])[0] n = data[8:8 + li].decode('utf-8') t = np.fromstring(data[8 + li:8 + li + lt]) d = np.fromstring(data[8 + li + lt:]) return Trace(d, t, name=n)
def total_trace(self, twin=None): if twin is None: twin = (-np.inf, np.inf) tme = [] tic = [] for t, z in self._scan_iter(['ScanTime', 'TIC']): if t < twin[0]: continue elif t > twin[1]: break tme.append(t) tic.append(z) return Trace(np.array(tic), np.array(tme), name='TIC')
def total_trace(self, twin=None): if twin is None: twin = (-np.inf, np.inf) times, y = [], [] for s in self.scans(twin): t = float(s.name) if t < twin[0]: continue if t > twin[1]: break times.append(t) y.append(sum(s.abn)) return Trace(y, times, name='tic')
def generate_chromatogram(n=5, twin=None): """ Generates a trace with n gaussian peaks distributed through it. """ if twin is None: twin = (0, 60) t = np.linspace(twin[0], twin[1], 300) peak_locs = twin[1] * np.random.random(n) peak_ws = 0.2 + 0.8 * np.random.random(n) peak_hs = 0.2 + 0.8 * np.random.random(n) y = np.zeros(len(t)) for peak_loc, peak_w, peak_h in zip(peak_locs, peak_ws, peak_hs): y += gaussian(t, x=peak_loc, w=peak_w, h=peak_h) y += np.random.normal(scale=0.01, size=len(t)) return Trace(y, t, ['X'])
def total_trace(self, twin=None): f = open(self.filename, 'rb') f.seek(0x11A) start_time = struct.unpack('>f', f.read(4))[0] / 60000. end_time = struct.unpack('>f', f.read(4))[0] / 60000. # TODO: figure out if this exists and where? # FID signal seems like 10x higher than it should be? # f.seek(0x284) # del_ab = 0.1 # struct.unpack('>d', f.read(8))[0] # data = [] f.seek(0x1800) data = np.fromfile(f, '<f8') times = np.linspace(start_time, end_time, data.shape[0]) return Trace(data, times, name='TIC')
def trace(self, name='', tol=0.5, twin=None): if twin is None: twin = (-np.inf, np.inf) if name in {'tic', 'x', ''}: return self.total_trace(twin) times, y = [], [] for s in self.scans(twin): t = float(s.name) if t < twin[0]: continue if t > twin[1]: break times.append(t) # TODO: this can be vectorized with numpy? y.append(sum(j for i, j in zip(s.x, s.abn) if np.abs(i - name) < tol)) return Trace(y, times, name=name)
def read_mh_trace(filename, trace_name): f = open(filename, 'rb') fdat = open(filename[:-3] + '.cg', 'rb') ttab = { 'pres': 'Pressure', 'flow': 'Flow', 'slvb': '%B', 'temp': 'Temperature of Left Heat Exchanger' } # convenience function for reading in data def rd(st): return struct.unpack(st, f.read(struct.calcsize(st))) f.seek(0x4c) num_traces = rd('<I')[0] for _ in range(num_traces): cloc = f.tell() f.seek(cloc + 2) sl = rd('<B')[0] cur_trace_name = rd('<' + str(sl) + 's')[0] if ttab[trace_name] == cur_trace_name: f.seek(f.tell() + 4) foff = rd('<Q')[0] npts = rd('<I')[0] + 2 # +2 for the extra time info fdat.seek(foff) pts = struct.unpack('<' + npts * 'd', fdat.read(8 * npts)) # TODO: pts[0] is not the true offset? t = pts[0] + pts[1] * np.arange(npts - 2) d = np.array(pts[2:]) # get the units f.seek(f.tell() + 40) sl = rd('<B')[0] y_units = rd('<' + str(sl) + 's')[0] if y_units == 'bar': d *= 0.1 # convert to MPa for metricness elif y_units == '': pass # TODO: ul/min to ml/min return Trace(d, t, name=trace_name) f.seek(cloc + 87)
def read_reg_file(f, foff=0x2D): """ Given a file handle for an old-style Agilent *.REG file, this will parse that file into a dictonary of key/value pairs (including any tables that are in the *.REG file, which will be parsed into lists of lists). """ # convenience function for reading in data def rd(st): return struct.unpack(st, f.read(struct.calcsize(st))) f.seek(0x19) if f.read(1) != b'A': # raise TypeError("Version of REG file is too new.") return {} f.seek(foff) nrecs = rd('<I')[0] # TODO: should be '<H' rec_tab = [rd('<HHIII') for n in range(nrecs)] names = {} f.seek(foff + 20 * nrecs + 4) for r in rec_tab: d = f.read(r[2]) if r[1] == 1539: # '0306' # this is part of the linked list too, but contains a # reference to a table cd = struct.unpack('<HIII21sI', d) names[cd[5]] = cd[4].decode('iso8859').strip('\x00') # except: # pass elif r[1] == 32769 or r[1] == 32771: # b'0180' or b'0380' names[r[4]] = d[:-1].decode('iso8859') elif r[1] == 32774: # b'0680' # this is a string that is referenced elsewhere (in a table) names[r[4]] = d[2:-1].decode('iso8859') elif r[1] == 32770: # b'0280' # this is just a flattened numeric array names[r[4]] = np.frombuffer(d, dtype=np.uint32, offset=4) data = {} f.seek(foff + 20 * nrecs + 4) for r in rec_tab: d = f.read(r[2]) if r[1] == 1538: # '0206' # this is part of a linked list if len(d) == 43: cd = struct.unpack('<HIII21sd', d) data[cd[4].decode('iso8859').strip('\x00')] = cd[5] else: pass elif r[1] == 1537: # b'0106' # name of property n = d[14:30].split(b'\x00')[0].decode('iso8859') # with value from names data[n] = names.get(struct.unpack('<I', d[35:39])[0], '') elif r[1] == 1793: # b'0107' # this is a table of values nrow = struct.unpack('<H', d[4:6])[0] ncol = struct.unpack('<H', d[16:18])[0] if ncol != 0: cols = [ struct.unpack('<16sHHHHHI', d[20 + 30 * i:50 + 30 * i]) for i in range(ncol) ] colnames = [ c[0].split(b'\x00')[0].decode('iso8859') for c in cols ] # TODO: type 2 is not a constant size? 31, 17 rty2sty = { 1: 'H', 3: 'I', 4: 'f', 5: 'H', 7: 'H', 8: 'd', 11: 'H', 12: 'H', 13: 'I', 14: 'I', 16: 'H' } coltype = '<' + ''.join( [rty2sty.get(c[3], str(c[2]) + 's') for c in cols]) lencol = struct.calcsize(coltype) tab = [] for i in reversed(range(2, nrow + 2)): rawrow = struct.unpack(coltype, d[-i * lencol:(1 - i) * lencol]) row = [] for j, p in enumerate(rawrow): if cols[j][3] == 3: row.append(names.get(p, str(p))) else: row.append(p) tab.append(row) data[names[r[4]]] = [colnames, tab] elif r[1] == 1281 or r[1] == 1283: # b'0105' or b'0305' fm = '<HHBIIhIdII12shIddQQB8sII12shIddQQB8s' m = struct.unpack(fm, d) nrecs = m[4] # number of points in table # x_units = names.get(m[8], '') x_arr = m[14] * names.get(m[9], np.arange(nrecs - 1)) y_arr = m[25] * names.get(m[20]) y_units = names.get(m[19], '') if y_units == 'bar': y_arr *= 0.1 # convert to MPa # TODO: what to call this? data['Trace'] = Trace(y_arr, x_arr, name='') # elif r[1] == 1025: # b'0104' # # lots of zeros? maybe one or two numbers? # # only found in REG entries that have long 0280 records # fm = '<HQQQIHHHHIIHB' # m = struct.unpack(fm, d) # print(m) # #print(r[1], len(d), binascii.hexlify(d)) # pass # elif r[1] == 512: # b'0002' # # either points to two null pointers or two other pointers # # (indicates start of linked list?) # print(r[1], len(d), binascii.hexlify(d)) # elif r[1] == 769 or r[1] == 772: # b'0103' or b'0403' # # points to 2nd, 3rd & 4th records (two 0002 records and a 0180) # b = binascii.hexlify # print(b(d[10:14]), b(d[14:18]), b(d[18:22])) return data
def molmz(df, noise=10000): """ The mz of the molecular ion. """ d = ((df.values > noise) * df.columns).max(axis=1) return Trace(d, df.index, name='molmz')
def ratio_series(ts, pks, r2, r1): sim_y = ratio_f(pks, r2, r1) return Trace(sim_y(ts.index), ts.index, name='{:.1f}/{:.1f}'.format(r2, r1))
def parse_ion_string(istr, analyses, twin=None): """ Recursive string parser that handles "ion" strings. """ if istr.strip() == '': return Trace() # remove (unnessary?) pluses from the front # TODO: plus should be abs? istr = istr.lstrip('+') # invert it if preceded by a minus sign if istr[0] == '-': return -parse_ion_string(istr[1:], analyses, twin) # this is a function or paranthesized expression if is_parans_exp(istr): if ')' not in istr: # unbalanced parantheses pass fxn = istr.split('(')[0] args = istr[istr.find('(') + 1:istr.find(')')].split(',') if fxn == '': # strip out the parantheses and continue istr = args[0] else: ts = parse_ion_string(args[0], analyses, twin) # FIXME return ts # return fxn_resolver(ts, fxn, *args[1:]) # all the complicated math is gone, so simple lookup if set(istr).intersection(set('+-/*()')) == set(): if istr in SHORTCUTS: # allow some shortcuts to pull out common ions return parse_ion_string(SHORTCUTS[istr], analyses, twin) elif istr[0] == '!' and all(i in '0123456789.' for i in istr[1:]): # TODO: should this handle negative numbers? return float(istr[1:]) elif istr == '!pi': return np.pi elif istr == '!e': return np.e else: return trace_resolver(istr, analyses, twin) # go through and handle operators for token in '/*+-^': if len(tokenize(istr, token)) != 1: ts = tokenize(istr, token) s = parse_ion_string(ts[0], analyses, twin) for t in ts[1:]: if token == '/': s /= parse_ion_string(t, analyses, twin) elif token == '*': s *= parse_ion_string(t, analyses, twin) elif token == '+': s += parse_ion_string(t, analyses, twin) elif token == '-': s -= parse_ion_string(t, analyses, twin) elif token == '^': s **= parse_ion_string(t, analyses, twin) return s raise Exception('Parser hit a point it shouldn\'t have!')
def trace_resolver(istr, analyses, twin=None): avail_sources = [ i.lstrip('#*') for a in analyses for i in a.trace.split(',') ] istr, source = token_source(istr, avail_sources) if source is None: return Trace() for a in analyses: if source in [i.lstrip('#*') for i in a.trace.split(',')]: df = a.datafile break else: df = None if istr in {'coda', 'rnie', 'wmsm'}: # TODO: allow more complicated options to turn # Chromatograms into plotable Traces # coda # Windig W: The use of the Durbin-Watson criterion for # noise and background reduction of complex liquid # chromatography/mass spectrometry data and a new algorithm # to determine sample differences. Chemometrics and # Intelligent Laboratory Systems. 2005, 77:206-214. # rnie # Yunfei L, Qu H, and Cheng Y: A entropy-based method # for noise reduction of LC-MS data. Analytica Chimica # Acta 612.1 (2008) # wmsm # Fleming C et al. Windowed mass selection method: # a new data processing algorithm for LC-MS data. # Journal of Chromatography A 849.1 (1999) 71-85. pass elif istr.startswith('m_'): if istr == 'm_': m = 0.0 else: m = float(istr.split('_')[1]) return mzminus(df.data, m) elif istr == 'molmz': return molmz(df.data) elif istr == 'basemz': return basemz(df.data) elif istr in {'r45std', 'r46std'}: # TODO: calculate isotopic data -> needs integrated peak # references of associated peaks in order to make these calculations pass # calculate isotopic reference for chromatogram # if name == 'r45std': # topion = 45 # else: # topion = 46 # std_specs = [o for o in \ # self.children_of_type('peak') \ # if o.info['p-type'] == 'Isotope Standard'] # x = [float(o.info['p-s-time']) for o in std_specs] # y = [o.area(topion) / o.area(44) for o in std_specs \ # if o.area(44) != 0] # if len(x) == 0 or len(y) == 0: # return self._const(0.0, twin) # p0 = [y[0], 0] # errfunc = lambda p, x, y: p[0] + p[1] * x - y # try: # p, succ = leastsq(errfunc, p0, args=(np.array(x), \ # np.array(y))) # except: # p = p0 # sim_y = np.array(errfunc(p, t, np.zeros(len(t)))) # return TimeSeries(sim_y, t, [name]) else: # interpret tolerances if ':' in istr: st = float(istr.split(':')[0]) en = float(istr.split(':')[1]) tol = 0.5 * (en - st) istr = 0.5 * (en + st) elif u'±' in istr: tol = float(istr.split(u'±')[1]) istr = float(istr.split(u'±')[0]) else: tol = 0.5 return df.trace(istr, tol, twin=twin)
def generate_gaussian(): t = np.linspace(0, 60, 300) y = gaussian(t, x=30, w=2, h=1) return Trace(y, t, ['X'])