예제 #1
    def total_trace(self, twin=None):
        # TODO: use twin
        # TODO: only get the scans with totIonCurrent; if none found
        # calculate from the data

        r = Et.parse(self.filename).getroot()
        s = r.findall('*//m:scan', namespaces=self.ns)
        d = np.array([float(i.get('totIonCurrent')) for i in s])
        t = np.array([t_to_min(i.get('retentionTime')) for i in s])
        return Trace(d, t, name='TIC')
예제 #2
    def total_trace(self, twin=None):
        r = Et.parse(self.filename).getroot()

        # get it from the chromatogram list
        c = r.find('.//m:cvParam[@accession="MS:1000235"]/..',
        if c is not None:
            q = './/m:cvParam[@accession="MS:1000595"]/..'
            index = self.read_binary(c.find(q, namespaces=self.ns))
            q = './/m:cvParam[@accession="MS:1000515"]/..'
            values = self.read_binary(c.find(q, namespaces=self.ns))
            return Trace(values, index, name='tic')
예제 #3
def loads(ast_str):
    Create a Trace from a suitably compressed string.
    data = zlib.decompress(ast_str)
    li = struct.unpack('<L', data[0:4])[0]
    lt = struct.unpack('<L', data[4:8])[0]
    n = data[8:8 + li].decode('utf-8')
    t = np.fromstring(data[8 + li:8 + li + lt])
    d = np.fromstring(data[8 + li + lt:])

    return Trace(d, t, name=n)
예제 #4
 def total_trace(self, twin=None):
     if twin is None:
         twin = (-np.inf, np.inf)
     tme = []
     tic = []
     for t, z in self._scan_iter(['ScanTime', 'TIC']):
         if t < twin[0]:
         elif t > twin[1]:
     return Trace(np.array(tic), np.array(tme), name='TIC')
예제 #5
파일: __init__.py 프로젝트: scholer/Aston
 def total_trace(self, twin=None):
     if twin is None:
         twin = (-np.inf, np.inf)
     times, y = [], []
     for s in self.scans(twin):
         t = float(s.name)
         if t < twin[0]:
         if t > twin[1]:
     return Trace(y, times, name='tic')
예제 #6
def generate_chromatogram(n=5, twin=None):
    Generates a trace with n gaussian peaks distributed through it.
    if twin is None:
        twin = (0, 60)
    t = np.linspace(twin[0], twin[1], 300)
    peak_locs = twin[1] * np.random.random(n)
    peak_ws = 0.2 + 0.8 * np.random.random(n)
    peak_hs = 0.2 + 0.8 * np.random.random(n)

    y = np.zeros(len(t))
    for peak_loc, peak_w, peak_h in zip(peak_locs, peak_ws, peak_hs):
        y += gaussian(t, x=peak_loc, w=peak_w, h=peak_h)
    y += np.random.normal(scale=0.01, size=len(t))
    return Trace(y, t, ['X'])
예제 #7
    def total_trace(self, twin=None):
        f = open(self.filename, 'rb')

        start_time = struct.unpack('>f', f.read(4))[0] / 60000.
        end_time = struct.unpack('>f', f.read(4))[0] / 60000.

        # TODO: figure out if this exists and where?
        # FID signal seems like 10x higher than it should be?
        # f.seek(0x284)
        # del_ab = 0.1  # struct.unpack('>d', f.read(8))[0]
        # data = []

        data = np.fromfile(f, '<f8')
        times = np.linspace(start_time, end_time, data.shape[0])
        return Trace(data, times, name='TIC')
예제 #8
파일: __init__.py 프로젝트: scholer/Aston
 def trace(self, name='', tol=0.5, twin=None):
     if twin is None:
         twin = (-np.inf, np.inf)
     if name in {'tic', 'x', ''}:
         return self.total_trace(twin)
     times, y = [], []
     for s in self.scans(twin):
         t = float(s.name)
         if t < twin[0]:
         if t > twin[1]:
         # TODO: this can be vectorized with numpy?
         y.append(sum(j for i, j in zip(s.x, s.abn)
                      if np.abs(i - name) < tol))
     return Trace(y, times, name=name)
예제 #9
def read_mh_trace(filename, trace_name):
    f = open(filename, 'rb')
    fdat = open(filename[:-3] + '.cg', 'rb')

    ttab = {
        'pres': 'Pressure',
        'flow': 'Flow',
        'slvb': '%B',
        'temp': 'Temperature of Left Heat Exchanger'

    # convenience function for reading in data
    def rd(st):
        return struct.unpack(st, f.read(struct.calcsize(st)))

    num_traces = rd('<I')[0]
    for _ in range(num_traces):
        cloc = f.tell()
        f.seek(cloc + 2)
        sl = rd('<B')[0]
        cur_trace_name = rd('<' + str(sl) + 's')[0]
        if ttab[trace_name] == cur_trace_name:
            f.seek(f.tell() + 4)
            foff = rd('<Q')[0]
            npts = rd('<I')[0] + 2  # +2 for the extra time info
            pts = struct.unpack('<' + npts * 'd', fdat.read(8 * npts))
            # TODO: pts[0] is not the true offset?
            t = pts[0] + pts[1] * np.arange(npts - 2)
            d = np.array(pts[2:])
            # get the units
            f.seek(f.tell() + 40)
            sl = rd('<B')[0]
            y_units = rd('<' + str(sl) + 's')[0]
            if y_units == 'bar':
                d *= 0.1  # convert to MPa for metricness
            elif y_units == '':
                pass  # TODO: ul/min to ml/min
            return Trace(d, t, name=trace_name)

        f.seek(cloc + 87)
예제 #10
def read_reg_file(f, foff=0x2D):
    Given a file handle for an old-style Agilent *.REG file, this
    will parse that file into a dictonary of key/value pairs
    (including any tables that are in the *.REG file, which will
    be parsed into lists of lists).

    # convenience function for reading in data
    def rd(st):
        return struct.unpack(st, f.read(struct.calcsize(st)))

    if f.read(1) != b'A':
        # raise TypeError("Version of REG file is too new.")
        return {}

    nrecs = rd('<I')[0]  # TODO: should be '<H'
    rec_tab = [rd('<HHIII') for n in range(nrecs)]

    names = {}
    f.seek(foff + 20 * nrecs + 4)
    for r in rec_tab:
        d = f.read(r[2])
        if r[1] == 1539:  # '0306'
            # this is part of the linked list too, but contains a
            # reference to a table
            cd = struct.unpack('<HIII21sI', d)
            names[cd[5]] = cd[4].decode('iso8859').strip('\x00')
            # except:
            #     pass
        elif r[1] == 32769 or r[1] == 32771:  # b'0180' or b'0380'
            names[r[4]] = d[:-1].decode('iso8859')
        elif r[1] == 32774:  # b'0680'
            # this is a string that is referenced elsewhere (in a table)
            names[r[4]] = d[2:-1].decode('iso8859')
        elif r[1] == 32770:  # b'0280'
            # this is just a flattened numeric array
            names[r[4]] = np.frombuffer(d, dtype=np.uint32, offset=4)

    data = {}
    f.seek(foff + 20 * nrecs + 4)
    for r in rec_tab:
        d = f.read(r[2])
        if r[1] == 1538:  # '0206'
            # this is part of a linked list
            if len(d) == 43:
                cd = struct.unpack('<HIII21sd', d)
                data[cd[4].decode('iso8859').strip('\x00')] = cd[5]
        elif r[1] == 1537:  # b'0106'
            # name of property
            n = d[14:30].split(b'\x00')[0].decode('iso8859')
            # with value from names
            data[n] = names.get(struct.unpack('<I', d[35:39])[0], '')
        elif r[1] == 1793:  # b'0107'
            # this is a table of values
            nrow = struct.unpack('<H', d[4:6])[0]
            ncol = struct.unpack('<H', d[16:18])[0]
            if ncol != 0:
                cols = [
                    struct.unpack('<16sHHHHHI', d[20 + 30 * i:50 + 30 * i])
                    for i in range(ncol)
                colnames = [
                    c[0].split(b'\x00')[0].decode('iso8859') for c in cols
                # TODO: type 2 is not a constant size? 31, 17
                rty2sty = {
                    1: 'H',
                    3: 'I',
                    4: 'f',
                    5: 'H',
                    7: 'H',
                    8: 'd',
                    11: 'H',
                    12: 'H',
                    13: 'I',
                    14: 'I',
                    16: 'H'
                coltype = '<' + ''.join(
                                 str(c[2]) + 's') for c in cols])
                lencol = struct.calcsize(coltype)
                tab = []
                for i in reversed(range(2, nrow + 2)):
                    rawrow = struct.unpack(coltype,
                                           d[-i * lencol:(1 - i) * lencol])
                    row = []
                    for j, p in enumerate(rawrow):
                        if cols[j][3] == 3:
                            row.append(names.get(p, str(p)))
                data[names[r[4]]] = [colnames, tab]
        elif r[1] == 1281 or r[1] == 1283:  # b'0105' or b'0305'
            fm = '<HHBIIhIdII12shIddQQB8sII12shIddQQB8s'
            m = struct.unpack(fm, d)
            nrecs = m[4]  # number of points in table

            # x_units = names.get(m[8], '')
            x_arr = m[14] * names.get(m[9], np.arange(nrecs - 1))
            y_arr = m[25] * names.get(m[20])
            y_units = names.get(m[19], '')
            if y_units == 'bar':
                y_arr *= 0.1  # convert to MPa
            # TODO: what to call this?
            data['Trace'] = Trace(y_arr, x_arr, name='')
        # elif r[1] == 1025:  # b'0104'
        #     # lots of zeros? maybe one or two numbers?
        #     # only found in REG entries that have long 0280 records
        #     fm = '<HQQQIHHHHIIHB'
        #     m = struct.unpack(fm, d)
        #     print(m)
        #     #print(r[1], len(d), binascii.hexlify(d))
        #     pass
        # elif r[1] == 512:  # b'0002'
        #     # either points to two null pointers or two other pointers
        #     # (indicates start of linked list?)
        #     print(r[1], len(d), binascii.hexlify(d))
        # elif r[1] == 769 or r[1] == 772:  # b'0103' or b'0403'
        #     # points to 2nd, 3rd & 4th records (two 0002 records and a 0180)
        #     b = binascii.hexlify
        #     print(b(d[10:14]), b(d[14:18]), b(d[18:22]))

    return data
예제 #11
def molmz(df, noise=10000):
    The mz of the molecular ion.
    d = ((df.values > noise) * df.columns).max(axis=1)
    return Trace(d, df.index, name='molmz')
예제 #12
파일: isotopes.py 프로젝트: scholer/Aston
def ratio_series(ts, pks, r2, r1):
    sim_y = ratio_f(pks, r2, r1)
    return Trace(sim_y(ts.index),
                 name='{:.1f}/{:.1f}'.format(r2, r1))
예제 #13
def parse_ion_string(istr, analyses, twin=None):
    Recursive string parser that handles "ion" strings.

    if istr.strip() == '':
        return Trace()

    # remove (unnessary?) pluses from the front
    # TODO: plus should be abs?
    istr = istr.lstrip('+')

    # invert it if preceded by a minus sign
    if istr[0] == '-':
        return -parse_ion_string(istr[1:], analyses, twin)

    # this is a function or paranthesized expression
    if is_parans_exp(istr):
        if ')' not in istr:
            # unbalanced parantheses
        fxn = istr.split('(')[0]
        args = istr[istr.find('(') + 1:istr.find(')')].split(',')
        if fxn == '':
            # strip out the parantheses and continue
            istr = args[0]
            ts = parse_ion_string(args[0], analyses, twin)
            # FIXME
            return ts
            # return fxn_resolver(ts, fxn, *args[1:])

    # all the complicated math is gone, so simple lookup
    if set(istr).intersection(set('+-/*()')) == set():
        if istr in SHORTCUTS:
            # allow some shortcuts to pull out common ions
            return parse_ion_string(SHORTCUTS[istr], analyses, twin)
        elif istr[0] == '!' and all(i in '0123456789.' for i in istr[1:]):
            # TODO: should this handle negative numbers?
            return float(istr[1:])
        elif istr == '!pi':
            return np.pi
        elif istr == '!e':
            return np.e
            return trace_resolver(istr, analyses, twin)

    # go through and handle operators
    for token in '/*+-^':
        if len(tokenize(istr, token)) != 1:
            ts = tokenize(istr, token)
            s = parse_ion_string(ts[0], analyses, twin)
            for t in ts[1:]:
                if token == '/':
                    s /= parse_ion_string(t, analyses, twin)
                elif token == '*':
                    s *= parse_ion_string(t, analyses, twin)
                elif token == '+':
                    s += parse_ion_string(t, analyses, twin)
                elif token == '-':
                    s -= parse_ion_string(t, analyses, twin)
                elif token == '^':
                    s **= parse_ion_string(t, analyses, twin)
            return s
    raise Exception('Parser hit a point it shouldn\'t have!')
예제 #14
def trace_resolver(istr, analyses, twin=None):
    avail_sources = [
        i.lstrip('#*') for a in analyses for i in a.trace.split(',')
    istr, source = token_source(istr, avail_sources)
    if source is None:
        return Trace()

    for a in analyses:
        if source in [i.lstrip('#*') for i in a.trace.split(',')]:
            df = a.datafile
        df = None

    if istr in {'coda', 'rnie', 'wmsm'}:
        # TODO: allow more complicated options to turn
        # Chromatograms into plotable Traces

        # coda
        #  Windig W: The use of the Durbin-Watson criterion for
        #  noise and background reduction of complex liquid
        #  chromatography/mass spectrometry data and a new algorithm
        #  to determine sample differences. Chemometrics and
        #  Intelligent Laboratory Systems. 2005, 77:206-214.

        # rnie
        #  Yunfei L, Qu H, and Cheng Y: A entropy-based method
        #  for noise reduction of LC-MS data. Analytica Chimica
        #  Acta 612.1 (2008)

        # wmsm
        #  Fleming C et al. Windowed mass selection method:
        #  a new data processing algorithm for LC-MS data.
        #  Journal of Chromatography A 849.1 (1999) 71-85.
    elif istr.startswith('m_'):
        if istr == 'm_':
            m = 0.0
            m = float(istr.split('_')[1])
        return mzminus(df.data, m)
    elif istr == 'molmz':
        return molmz(df.data)
    elif istr == 'basemz':
        return basemz(df.data)
    elif istr in {'r45std', 'r46std'}:
        # TODO: calculate isotopic data -> needs integrated peak
        # references of associated peaks in order to make these calculations
        #  calculate isotopic reference for chromatogram
        # if name == 'r45std':
        #     topion = 45
        # else:
        #     topion = 46
        # std_specs = [o for o in \
        #   self.children_of_type('peak') \
        #   if o.info['p-type'] == 'Isotope Standard']
        # x = [float(o.info['p-s-time']) for o in std_specs]
        # y = [o.area(topion) / o.area(44) for o in std_specs \
        #      if o.area(44) != 0]
        # if len(x) == 0 or len(y) == 0:
        #     return self._const(0.0, twin)

        # p0 = [y[0], 0]
        # errfunc = lambda p, x, y: p[0] + p[1] * x - y
        # try:
        #     p, succ = leastsq(errfunc, p0, args=(np.array(x), \
        #                                          np.array(y)))
        # except:
        #     p = p0
        # sim_y = np.array(errfunc(p, t, np.zeros(len(t))))
        # return TimeSeries(sim_y, t, [name])
        # interpret tolerances
        if ':' in istr:
            st = float(istr.split(':')[0])
            en = float(istr.split(':')[1])
            tol = 0.5 * (en - st)
            istr = 0.5 * (en + st)
        elif u'±' in istr:
            tol = float(istr.split(u'±')[1])
            istr = float(istr.split(u'±')[0])
            tol = 0.5

        return df.trace(istr, tol, twin=twin)
예제 #15
def generate_gaussian():
    t = np.linspace(0, 60, 300)
    y = gaussian(t, x=30, w=2, h=1)
    return Trace(y, t, ['X'])