def info(self): d = super(ThermoDXF, self).info # try: #TODO: this crashes in python 3; not clear why? # except: # pass # info['file name'] = os.path.basename(self.filename) d['name'] = os.path.splitext(os.path.basename(self.filename))[0] with open(self.filename, 'rb') as f: foff_o = find_offset(f, 'd 18O/16O'.encode('utf_16_le')) foff_c = find_offset(f, 'd 13C/12C'.encode('utf_16_le')) if foff_o is not None: f.seek(foff_o + 68) d['d18o_std'] = str(struct.unpack('<d', f.read(8))[0]) if foff_c is not None: f.seek(foff_c + 68) d['d13c_std'] = str(struct.unpack('<d', f.read(8))[0]) return d
def data(self): f = open(self.filename, 'rb') # TODO: use find_offset to find this? # f.seek(11) # while True: # f.seek(f.tell() - 11) # if f.read(11) == b'CEvalGCData': # break # if f.read(1) == b'': # f.close() # return f.seek(find_offset(f, b'CRawData') + 9) strlen = 2 * struct.unpack('<B', f.read(1))[0] tname = f.read(strlen).decode('utf_16_le') if tname == 'CO2': ions = [44, 45, 46] elif tname == 'CO': ions = [28, 29, 30] elif tname == 'SO2,SO-SO2 Ext,SO': # TODO: check this is in the right order ions = [48, 49, 50, 64, 65, 66] else: # TODO: should save the tname somewhere for future reference ions = [1, 2, 3] f.seek(find_offset(f, b'CEvalGCData') + 4) # bytes until the end converted to # of records nscans = int(struct.unpack('<I', f.read(4))[0] / (4.0 + len(ions) * 8.0)) dtype = np.dtype([('index', '<f4'), ('values', '<f8', len(ions))]) data = np.fromfile(f, dtype=dtype, count=nscans) # convert time to minutes data['index'] /= 60. f.close() return Chromatogram(data['values'], data['index'], ions)
def data(self): # TODO: handle skip mass ranges with open(self.filename, 'rb') as f: # read in the time segments/mz ranges for the run # read in the data itself doff = find_offset(f, 4 * b'\xff' + 'HapsScan'.encode('ascii')) if doff is None: return f.seek(doff - 20) data_end = doff + struct.unpack('<I', f.read(4))[0] + 55 f.seek(doff + 56) times, abns, mzs = [], [], [] cur_seg = None mz_reader = self._ions(f) while f.tell() <= data_end: # record info looks like a standard format n, t, _, recs, _, seg = struct.unpack('<IiHHHH', f.read(16)) if cur_seg != seg: # if we've switched segments, update the list of mzs try: cur_mzs = next(mz_reader) except StopIteration: break mzs += set(cur_mzs).difference(mzs) mz_idx = [mzs.index(i) for i in cur_mzs] cur_seg = seg # just add the new time in times.append(t) # read the list of abundances cur_abns = struct.unpack('<' + 'f' * recs, f.read(4 * recs)) # convert this list into an array that matches up with # whatever mzs we currently have empty_row = np.zeros(len(mzs)) empty_row[mz_idx] = cur_abns # add that row into the list abns.append(empty_row) # convert the time from milliseconds to minutes times = np.array(times, dtype=float) / 60000 # create the data array and populate it data = np.zeros((len(times), len(mzs))) for i, r in enumerate(abns): data[i, 0:len(r)] = r return Chromatogram(data, times, mzs)
def _ions(self, f): """ This is a generator that returns the mzs being measured during each time segment, one segment at a time. """ outside_pos = f.tell() doff = find_offset(f, 4 * b'\xff' + 'HapsSearch'.encode('ascii')) # actual end of prev section is 34 bytes before, but assume 1 rec f.seek(doff - 62) # seek backwards to find the FFFFFFFF header while True: f.seek(f.tell() - 8) if f.read(4) == 4 * b'\xff': break f.seek(f.tell() + 64) nsegments = struct.unpack('<I', f.read(4))[0] for _ in range(nsegments): # first 32 bytes are segment name, rest are something else? f.seek(f.tell() + 96) nions = struct.unpack('<I', f.read(4))[0] ions = [] for _ in range(nions): # TODO: check that itype is actually a SIM/full scan switch i1, i2, _, _, _, _, itype, _ = struct.unpack( '<' + 8 * 'I', f.read(32)) if itype == 0: # SIM ions.append(i1 / 100.) else: # full scan # TODO: this might be a little hacky? # ideally we would need to know n for this, e.g.: # ions += np.linspace(i1 / 100, i2 / 100, n).tolist() ions += np.arange(i1 / 100., i2 / 100. + 1, 1).tolist() # save the file position and load the position # that we were at before we started this code inside_pos = f.tell() f.seek(outside_pos) yield ions outside_pos = f.tell() f.seek(inside_pos) f.seek(outside_pos)
def _ions(self, f): """ This is a generator that returns the mzs being measured during each time segment, one segment at a time. """ outside_pos = f.tell() doff = find_offset(f, 4 * b'\xff' + 'HapsSearch'.encode('ascii')) # actual end of prev section is 34 bytes before, but assume 1 rec f.seek(doff - 62) # seek backwards to find the FFFFFFFF header while True: f.seek(f.tell() - 8) if f.read(4) == 4 * b'\xff': break f.seek(f.tell() + 64) nsegments = struct.unpack('<I', f.read(4))[0] for _ in range(nsegments): # first 32 bytes are segment name, rest are something else? f.seek(f.tell() + 96) nions = struct.unpack('<I', f.read(4))[0] ions = [] for _ in range(nions): # TODO: check that itype is actually a SIM/full scan switch i1, i2, _, _, _, _, itype, _ = struct.unpack('<' + 8 * 'I', f.read(32)) if itype == 0: # SIM ions.append(i1 / 100.) else: # full scan # TODO: this might be a little hacky? # ideally we would need to know n for this, e.g.: # ions += np.linspace(i1 / 100, i2 / 100, n).tolist() ions += np.arange(i1 / 100., i2 / 100. + 1, 1).tolist() # save the file position and load the position # that we were at before we started this code inside_pos = f.tell() f.seek(outside_pos) yield ions outside_pos = f.tell() f.seek(inside_pos) f.seek(outside_pos)