def test_pl_tags_checking_methods(self): tags = self._createTags() self.assertTrue( tags.has_tag_type('typed_tag1') and tags.has_tag_type('typed_tag2')) self.assertTrue(tags.has_tag_type(None)) self.assertFalse(tags.has_tag_type('not_exist')) self.assertTrue(Tag(2, 'typed_tag2') in tags) self.assertTrue( tags.has_tag(0) and tags.has_tag('str_tag') and tags.has_tag(u'ustr_tag')) self.assertTrue( tags.has_tag(1, 'typed_tag1') and tags.has_tag(2, 'typed_tag2')) self.assertTrue(tags.has_tag(Tag(1, 'typed_tag1'))) self.assertFalse(tags.has_tag(0, 'typed_tag1')) self.assertFalse(tags.has_tag(1) or tags.has_tag(2)) self.assertFalse( tags.has_tag('not_exist') or tags.has_tag(1, 'wrong_type')) self.assertTupleEqual(tags.tag_of(), (0, 'str_tag', u'ustr_tag')) self.assertTrue( tags.tag_of('typed_tag1').value == 1 and tags.tag_of('typed_tag2').value == 2) self.assertTrue(tags.tag_of('not_such_type') is None)
def test_pm_exports(self): pm = self._createPeakMatrix() pm.add_flag('even_flag', [0, 1] * 5) with mask_peakmatrix(pm, plate=1): peaklists = pm.extract_peaklists() self.assertListEqual(map(lambda x: x.ID, peaklists), ['sample_2_1', 'sample_2_2', 'QC_2']) mzs = [ [101.0, 501.0, 701.0, 901.0], [101.0, 301.0, 501.0, 701.0, 901.0], [101.0, 301.0, 701.0, 901.0], ] self.assertTrue( all(map(lambda x: np.allclose(x[0].mz, x[1]), zip(peaklists, mzs)))) pm.drop_flag('even_flag') pkl = pm.to_peaklist('merged_pkl') self.assertTrue( np.allclose( pkl.mz, [1.0, 101.0, 301.0, 401.0, 501.0, 601.0, 701.0, 801.0, 901.0])) pm.to_str(comprehensive=True, rsd_tags=(Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'), 'qc'))
def test_tag_creation(self): try: tag1 = Tag('1') tag2 = Tag(2, 'batch') tag3 = Tag(tag2) except Exception, e: self.fail('create tag object failed: ' + str(e))
def test_tag_property(self): tag = Tag('value', ttype = 'type') self.assertTrue(tag.typed) tag.value = 1 tag.ttype = None self.assertTrue(tag.value == 1 and tag.ttype is None) self.assertFalse(tag.typed) def _assign_type(t): tag.ttype = t self.assertRaises(KeyError, lambda: _assign_type('None'))
def test_tag_creation(self): try: tag1 = Tag('1') tag2 = Tag(2, 'batch') tag3 = Tag(tag2) except Exception as e: self.fail('create tag object failed: ' + str(e)) self.assertTrue(tag1.value == '1' and tag1.ttype is None) self.assertTrue(tag2.value == 2 and tag2.ttype == 'batch') self.assertTrue(tag3.value == 2 and tag3.ttype == 'batch') self.assertRaises(KeyError, lambda: Tag(9, ttype = 'None'))
class TagTestCase(unittest.TestCase): def test_tag_creation(self): try: tag1 = Tag('1') tag2 = Tag(2, 'batch') tag3 = Tag(tag2) except Exception, e: self.fail('create tag object failed: ' + str(e)) self.assertTrue(tag1.value == '1' and tag1.ttype is None) self.assertTrue(tag2.value == 2 and tag2.ttype == 'batch') self.assertTrue(tag3.value == 2 and tag3.ttype == 'batch') self.assertRaises(TypeError, lambda: Tag((3, 4, 5))) self.assertRaises(TypeError, lambda: Tag(6, ttype=[7, 8])) self.assertRaises(KeyError, lambda: Tag(9, ttype='None'))
def test_tag_magic(self): tag = Tag(1, ttype = 'type') self.assertEqual(tag, Tag(1, 'type')) self.assertNotEqual(tag, 1) tag.ttype = None self.assertEqual(tag, 1) self.assertTrue(1 == tag) self.assertFalse(1 != tag) self.assertTrue(2 != tag) self.assertTrue(tag in (1, 2, 3)) self.assertTrue(1 in (tag, 2, 3)) self.assertEqual(str(tag), '1') tag.ttype = 'type' self.assertEqual(str(tag), 'type:1')
def test_pl_tags_adding_methods(self): tags = self._createTags() self.assertRaises(KeyError, lambda: tags.add_tag(3, 'typed_tag1')) self.assertRaises(ValueError, lambda: tags.add_tag(0)) self.assertRaises(ValueError, lambda: tags.add_tag('ustr_tag')) tags.add_tag(1) tags.add_tag(1, 'typed_tag3') tags.add_tag(Tag('new_value', 'typed_tag4')) self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2', 'typed_tag3', 'typed_tag4'}) self.assertEqual(tags.tag_values, {0, 1, 2, 'new_value', 'str_tag', 'ustr_tag'})
def _loadpm(): dset = f.root.mz if dset.attrs.data_class != 'PeakMatrix': raise IOError('input database is not a valid PeakMatrix') attl = dset.attrs.attributes pids = dset.attrs.peaklist_ids mask = dset.attrs.mask tatt = sorted([x for x in dset.attrs._f_list('user') if x.startswith('peaklist_tags_')], key = lambda x: int(x[14:])) ptgs = [PeakList_Tags(*[Tag(_eval(v), None if t == 'None' else t) for t, v in map(lambda x: x.astype(str), tags)]) for tags in [dset.attrs[x] for x in tatt]] flgs = [(flg, dset.attrs[flg]) for flg in dset.attrs.flag_names] alst = [(attr, f.root[attr].read().astype(f.root[attr].attrs.dtype)) for attr in attl] return pids, ptgs, alst, mask, flgs
def _old_loadpm(): dset = f['mz'] if _convByteStr(dset.attrs.get('class', '')) != 'PeakMatrix': raise IOError('input database is not a valid PeakMatrix') attl = dset.attrs['attributes'].astype(str) pids = dset.attrs['peaklist_ids'].astype(str) mask = dset.attrs['mask'] tatt = sorted([x for x in dset.attrs.keys() if x.startswith('peaklist_tags_')], key=lambda x: int(x[14:])) ptgs = [PeakList_Tags(*[Tag(_eval(v), None if t == 'None' else t) for t,v in map(lambda x: x.astype(str), tags)]) for tags in [dset.attrs[x] for x in tatt]] flgs = [(fn, dset.attrs[fn]) for fn in dset.attrs['flag_names'].astype(str)] alst = [(attr, np.array(f[attr]).astype(f[attr].attrs['dtype'])) for attr in attl] return pids, ptgs, alst, mask, flgs
def load_peak_matrix_from_hdf5(filename): """ Loads a peak matrix from a HDF5 file. :param filename: path to an existing HDF5 file :rtype: PeakMatrix object """ if not os.path.isfile(filename): raise IOError('HDF5 database [%s] does not exist' % filename) if not h5py.is_hdf5(filename): raise IOError('input file [%s] is not a valid HDF5 database' % filename) f = h5py.File(filename, 'r') if 'mz' not in f: raise IOError('input database missing crucial attribute [mz]') dset = f['mz'] if dset.attrs.get('class', '') != 'PeakMatrix': raise IOError('input database is not a valid PeakMatrix') attl = dset.attrs['attributes'] pids = dset.attrs['peaklist_ids'] mask = dset.attrs['mask'] tatt = sorted(filter(lambda x: x.startswith('peaklist_tags_'), dset.attrs.keys()), key=lambda x: int(x[14:])) ptgs = [ PeakList_Tags( *[Tag(_eval(v), None if t == 'None' else t) for t, v in tags]) for tags in map(lambda x: dset.attrs[x], tatt) ] flgs = [(fn, dset.attrs[fn]) for fn in dset.attrs['flag_names']] flgs = [(fn, _unpackBool(fv) if fv.dtype.kind == 'u' and np.all(fv[:len(_BOOL_HEADERS)] == _BOOL_HEADERS) else \ _unpackMeta(fv) if fv.dtype.kind == 'S' and fv[-1] == '\xFF' else fv) for fn,fv in flgs] alst = [(attr, np.array(f[attr]).astype(f[attr].attrs['dtype'])) for attr in attl] pm = PeakMatrix(pids, ptgs, alst) pm.mask = mask for fn, fv in flgs: pm.add_flag(fn, fv, flagged_only=False) return pm
def _createPeaklists(): _mzs = lambda: sorted(np.random.uniform(100, 1200, size=100)) _ints = lambda: np.abs(np.random.normal(100, 10, size=100)) pkls = [ PeakList('sample_1_1', _mzs(), _ints(), mz_range=(100, 1200)), PeakList('sample_1_2', _mzs(), _ints(), mz_range=(100, 1200)), PeakList('QC_1', _mzs(), _ints(), mz_range=(100, 1200)), PeakList('sample_2_1', _mzs(), _ints(), mz_range=(100, 1200)), PeakList('sample_2_2', _mzs(), _ints(), mz_range=(100, 1200)), PeakList('QC_2', _mzs(), _ints(), mz_range=(100, 1200)), ] for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t) for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t) for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t) for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t) for p in pkls: p.add_attribute('snr', np.random.uniform(300, 400, size=100)) for p in pkls: p.add_attribute('quad_flag', [0, 1, 1, 1] * 25, is_flag=True) for p in pkls: p.add_attribute('lab', [chr(i % 26 + 97) for i in range(100)], flagged_only=False) return pkls
def test_peak_matrix_portal(self): _mzs = lambda: sorted(np.random.uniform(100, 1200, size=100)) _ints = lambda: np.abs(np.random.normal(100, 10, size=100)) pkls = [ PeakList('sample_1_1', _mzs(), _ints()), PeakList('sample_1_2', _mzs(), _ints()), PeakList('QC_1', _mzs(), _ints()), PeakList('sample_2_1', _mzs(), _ints()), PeakList('sample_2_2', _mzs(), _ints()), PeakList('QC_2', _mzs(), _ints()), ] for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t) for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t) for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t) for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t) for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t) pm = align_peaks(pkls, ppm=2e+4, block_size=10, ncpus=2) pm.add_flag('odd_flag', ([0, 1] * int(pm.shape[1] / 2 + 1))[:pm.shape[1]]) pm.add_flag('qua_flag', ([0, 0, 1, 1] * int(pm.shape[1] / 4 + 1))[:pm.shape[1]]) save_peak_matrix_as_txt(pm, '.test_peak_matrix.txt', samples_in_rows=True, comprehensive=True, rsd_tags=('qc', Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'))) npm = load_peak_matrix_from_txt('.test_peak_matrix.txt', samples_in_rows=True, comprehensive='auto') self.assertEqual(pm.shape, npm.shape) self.assertEqual(pm.full_shape, npm.full_shape) self.assertTrue(np.all(pm.flags == npm.flags)) self.assertTrue(np.all(pm.flag_names == npm.flag_names)) self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix)) self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types) self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
def _createTags(): return PeakList_Tags(0, 'str_tag', 'ustr_tag', Tag(1, 'typed_tag1'), typed_tag2 = 2)