def testAggregateOperation(): t = ms.toTable("a", [], type_=int) t2 = t.aggregate(t.a.mean, "an", groupBy="a") assert len(t2) == 0 assert t2.getColTypes() == [int, object], t2.getColTypes() t = ms.toTable("a", [1, 2, 2, 3, 3, 3, 3]) t.addColumn("b", [None, None, 2, 0, 3, 4, 9]) t._print() t = t.aggregate(t.b.sum, "sum", groupBy="a") t = t.aggregate(t.b.hasNone, "hasNone", groupBy="a") t = t.aggregate(t.b.countNone, "countNone", groupBy="a") t = t.aggregate(t.b.count, "count", groupBy="a") t = t.aggregate(t.b.count - t.b.countNotNone, "countNone2", groupBy="a") t = t.aggregate(t.b.std * t.b.std, "var", groupBy="a") t = t.aggregate(t.b.mean, "mean", groupBy="a") t._print(w=8) print t.sum.values assert t.sum.values == [None, 2, 2, 16, 16, 16, 16], t.sum.values assert t.var.values == [None, 0, 0, 10.5, 10.5, 10.5, 10.5] assert t.mean.values == [None, 2, 2, 4, 4, 4, 4] assert t.hasNone.values == [1, 1, 1, 0, 0, 0, 0] assert t.countNone.values == [1, 1, 1, 0, 0, 0, 0] assert t.countNone.values == t.countNone2.values assert t.count.values == [1, 2, 2, 4, 4, 4, 4]
def testNumpyTypeCoercion(): import numpy as np t = ms.toTable("a", [np.int32(1)]) t.info() assert t.getColTypes() == [int], t.getColTypes() t = ms.toTable("a", [None, np.int32(1)]) t.info() assert t.getColTypes() == [int], t.getColTypes() t.addColumn("b", np.int32(1)) assert t.getColTypes() == [int, int], t.getColTypes() t.replaceColumn("b", [None, np.int32(1)]) assert t.getColTypes() == [int, int], t.getColTypes() t.replaceColumn("b", np.int64(1)) assert t.getColTypes() == [int, int], t.getColTypes() t.replaceColumn("b", [None, np.int64(1)]) assert t.getColTypes() == [int, int], t.getColTypes() t.replaceColumn("b", np.float32(1.0)) assert t.getColTypes() == [int, float], t.getColTypes() t.replaceColumn("b", [None, np.float32(1.0)]) assert t.getColTypes() == [int, float], t.getColTypes() t.replaceColumn("b", np.float64(2.0)) assert t.getColTypes() == [int, float], t.getColTypes() t.replaceColumn("b", [None, np.float64(2.0)]) assert t.getColTypes() == [int, float], t.getColTypes()
def testUniqeNotNone(): t = ms.toTable("a", [1, 1, None]) assert t.a.uniqueNotNone() == 1 t = ms.toTable("a", [1, 1, 1]) assert t.a.uniqueNotNone() == 1 t.addColumn("b", None) t._print() with ExceptionTester(): t.b.uniqueNotNone() t.addColumn("c", [None, 1, 2]) with ExceptionTester(): t.c.uniqueNotNone() t.addColumn("d", [1, 2, 2]) with ExceptionTester(): t.d.uniqueNotNone() with ExceptionTester(): t.addColumn("d", [2, 3, 4]) with ExceptionTester(): t.addConstantColumn("d", 3) t2 = ms.toTable("x", []) with ExceptionTester(): t.aggregate(t2.x.mean, "neu")
def testUniqeNotNone(): t = ms.toTable("a", [1,1,None]) assert t.a.uniqueNotNone() == 1 t = ms.toTable("a", [1,1,1]) assert t.a.uniqueNotNone() == 1 t.addColumn("b", None) t._print() with ExceptionTester(): t.b.uniqueNotNone() t.addColumn("c", [None, 1,2 ]) with ExceptionTester(): t.c.uniqueNotNone() t.addColumn("d", [1,2, 2 ]) with ExceptionTester(): t.d.uniqueNotNone() with ExceptionTester(): t.addColumn("d", [2,3,4]) with ExceptionTester(): t.addConstantColumn("d", 3) t2 = ms.toTable("x",[]) with ExceptionTester(): t.aggregate(t2.x.mean, "neu")
def testSpecialFormats(): for name in ["mz", "mzmin", "mzmax", "mw", "m0"]: t = ms.toTable(name, [1.0, 2, None]) assert t.colFormatters[0](1) == "1.00000", t.colFormatters[0](1) for name in ["rt", "rtmin", "rtmax"]: t = ms.toTable(name, [1.0, 2, None]) assert t.colFormatters[0](120) == "2.00m"
def test_small(): t = ms.toTable("m0",[195.0877, 194.07904]) tn = ms.matchMetlin(t, "m0", ["M"], 30) assert len(tn) == 23 assert len(set(tn.formula__0.values)) == 5 t = ms.toTable("m0",[195.0877, ]) tn = ms.matchMetlin(t, "m0", ["M", "M+H"], 30) assert len(tn) == 23 assert len(set(tn.formula__0.values)) == 5
def test_all_comps(): a = ms.toTable("a", [3, 2, 1]) b = ms.toTable("b", [1, 2, 3]) # must be sorted for tests below ! def _test(e, a=a, b=b): a.join(b, e).print_() t1 = a.join(b, a.a <= b.b).rows t2 = a.join(b, b.b >= a.a).rows t3 = b.join(a, a.a <= b.b).rows t4 = b.join(a, b.b >= a.a).rows b.sortBy("b") a.join(b, e).print_() s1 = a.join(b, a.a <= b.b).rows s2 = a.join(b, b.b >= a.a).rows s3 = b.join(a, a.a <= b.b).rows s4 = b.join(a, b.b >= a.a).rows assert t1 == t2 assert t3 == t4 assert t1 == s1 assert t2 == s2 assert t3 == s3 assert t4 == s4 b.join(a, e).print_() t1 = a.join(b, a.a <= b.b).rows t2 = a.join(b, b.b >= a.a).rows t3 = b.join(a, a.a <= b.b).rows t4 = b.join(a, b.b >= a.a).rows b.sortBy("b") b.join(a, e).print_() s1 = a.join(b, a.a <= b.b).rows s2 = a.join(b, b.b >= a.a).rows s3 = b.join(a, a.a <= b.b).rows s4 = b.join(a, b.b >= a.a).rows assert t1 == t2 assert t3 == t4 assert t1 == s1 assert t2 == s2 assert t3 == s3 assert t4 == s4 _test(a.a <= b.b) _test(a.a < b.b) _test(a.a >= b.b) _test(a.a > b.b) _test(a.a == b.b) _test(a.a != b.b)
def test_all_comps(): a = ms.toTable("a",[3,2,1]) b = ms.toTable("b",[1,2,3]) # must be sorted for tests below ! def _test(e, a=a, b=b): a.join(b, e).print_() t1 = a.join(b, a.a <= b.b).rows t2 = a.join(b, b.b >= a.a).rows t3 = b.join(a, a.a <= b.b).rows t4 = b.join(a, b.b >= a.a).rows b.sortBy("b") a.join(b, e).print_() s1 = a.join(b, a.a <= b.b).rows s2 = a.join(b, b.b >= a.a).rows s3 = b.join(a, a.a <= b.b).rows s4 = b.join(a, b.b >= a.a).rows assert t1 == t2 assert t3 == t4 assert t1 == s1 assert t2 == s2 assert t3 == s3 assert t4 == s4 b.join(a, e).print_() t1 = a.join(b, a.a <= b.b).rows t2 = a.join(b, b.b >= a.a).rows t3 = b.join(a, a.a <= b.b).rows t4 = b.join(a, b.b >= a.a).rows b.sortBy("b") b.join(a, e).print_() s1 = a.join(b, a.a <= b.b).rows s2 = a.join(b, b.b >= a.a).rows s3 = b.join(a, a.a <= b.b).rows s4 = b.join(a, b.b >= a.a).rows assert t1 == t2 assert t3 == t4 assert t1 == s1 assert t2 == s2 assert t3 == s3 assert t4 == s4 _test(a.a <= b.b) _test(a.a < b.b) _test(a.a >= b.b) _test(a.a > b.b) _test(a.a == b.b) _test(a.a != b.b)
def testUniqueValue(): t = ms.toTable("a", [1, 1, 1]) assert t.a.uniqueValue() == 1 t = ms.toTable("a", [1.2, 1.21, 1.19]) assert t.a.uniqueValue(up_to_digits=1) == 1.2 a = dict(b=3) b = dict(b=3) t = ms.toTable("a", [a, b]) print t.a.uniqueValue()
def test_small(): t = ms.toTable("m0", [195.0877, 194.07904]) tn = ms.matchMetlin(t, "m0", ["M"], 30) assert len(tn) == 23 assert len(set(tn.formula__0.values)) == 5 t = ms.toTable("m0", [ 195.0877, ]) tn = ms.matchMetlin(t, "m0", ["M", "M+H"], 30) assert len(tn) == 23 assert len(set(tn.formula__0.values)) == 5
def test_numpy_comparison(): v = np.array((1, 2, 3)) t = ms.toTable("a", [v]) t2 = t.filter(t.a == t.a) assert len(t2) == len(t) t2 = t.filter(t.a <= t.a) assert len(t2) == len(t) t2 = t.filter(t.a >= t.a) assert len(t2) == len(t) t2 = t.filter(t.a != t.a) assert len(t2) == 0 t2 = t.filter(t.a < t.a) assert len(t2) == 0 t2 = t.filter(t.a > t.a) assert len(t2) == 0 t2 = t.filter(t.a == 3) assert len(t2) == 0 t2 = t.filter(t.a <= 3) assert len(t2) == 1 t2 = t.filter(t.a >= 1) assert len(t2) == 1 t2 = t.filter(t.a != 3) assert len(t2) == 0 t2 = t.filter(t.a < 4) assert len(t2) == 1 t2 = t.filter(t.a > 0) assert len(t2) == 1
def testCompress(): t = ms.toTable("a", []) from libms.DataStructures.Table import compressPeakMaps from libms.DataStructures.MSTypes import PeakMap, Spectrum import numpy compressPeakMaps(t) s = Spectrum(numpy.arange(12).reshape(-1, 2), 1.0, 1, "+") pm = PeakMap([s]) s = Spectrum(numpy.arange(12).reshape(-1, 2), 1.0, 1, "+") pm2 = PeakMap([s]) t = ms.toTable("pm", [pm, pm2]) assert len(set(map(id, t.pm.values))) == 2 compressPeakMaps(t) assert len(set(map(id, t.pm.values))) == 1
def testSplitBy(): t = ms.toTable("a", [1, 1, 3, 4]) t.addColumn("b", [1, 1, 3, 3]) t.addColumn("c", [1, 2, 1, 4]) t._print() subts = t.splitBy("a") assert len(subts) == 3 res = ms.mergeTables(subts) assert len(res) == len(t) subts[0]._print() assert res.a.values == t.a.values assert res.b.values == t.b.values assert res.c.values == t.c.values # check if input tables are not altered for subt in subts: assert subt.getColNames() == ["a", "b", "c"] subts = t.splitBy("a", "c") assert len(subts) == 4 res = ms.mergeTables(subts) assert res.a.values == t.a.values assert res.b.values == t.b.values assert res.c.values == t.c.values # check if input tables are not altered for subt in subts: assert subt.getColNames() == ["a", "b", "c"]
def test_large(): import time mz_values = [185.0877 + i for i in range(500)] t = ms.toTable("m0", mz_values) start = time.time() tn = ms.matchMetlin(t, "m0", ["M"], 30) assert len(tn) >= 2709, len(tn)
def testApplyUfun(): import numpy t = ms.toTable("a", [None, 2.0, 3]) print numpy.log t.addColumn("log", t.a.apply(numpy.log)) assert t.getColTypes() == [float, float], t.getColTypes()
def testRenamePostfixes(): t = ms.toTable("a", [1, 2]) t.addColumn("b", t.a + 1) t = t.join(t) assert t.getColNames() == ["a", "b", "a__0", "b__0"], t.getColNames() t.renamePostfixes(__0="_new") assert t.getColNames() == ["a", "b", "a_new", "b_new"], t.getColNames()
def test_numpy_comparison(): v = np.array((1,2,3)) t = ms.toTable("a",[v]) t2 = t.filter(t.a == t.a) assert len(t2) == len(t) t2 = t.filter(t.a <= t.a) assert len(t2) == len(t) t2 = t.filter(t.a >= t.a) assert len(t2) == len(t) t2 = t.filter(t.a != t.a) assert len(t2) == 0 t2 = t.filter(t.a < t.a) assert len(t2) == 0 t2 = t.filter(t.a > t.a) assert len(t2) == 0 t2 = t.filter(t.a == 3) assert len(t2) == 0 t2 = t.filter(t.a <= 3) assert len(t2) == 1 t2 = t.filter(t.a >= 1) assert len(t2) == 1 t2 = t.filter(t.a != 3) assert len(t2) == 0 t2 = t.filter(t.a < 4) assert len(t2) == 1 t2 = t.filter(t.a > 0) assert len(t2) == 1
def split_srm_peakmap_by_precursors_to_tables(peakmap, delta_mz): """ split a srm/mrm peakmap to tables with mass transitions, mz ranges and corresponding MS2 data peakmaps. the result tables have columns: - precursor (mz value, uniqued for each table), - fragment (mz value) - mzmin, mzmax (calculated as fragment +/- delta_mz) - peakmap. """ measured_transitions = [] ms2_maps = peakmap.splitLevelN(2) for pre_mz, ms2_map in ms2_maps: # get unique fragment ions in level 2 map in ascending order: ions = sorted(set(mz for mz, I in ms2_map.msNPeaks(2))) # build a table with 'number of ions' rows table = ms.toTable("precursor", [pre_mz] * len(ions)) table.addColumn("fragment", ions) table.addColumn("peakmap", ms2_map) table.addColumn("mzmin", table.fragment - delta_mz) table.addColumn("mzmax", table.fragment + delta_mz) measured_transitions.append(table) return measured_transitions
def testCompress(): t = ms.toTable("a", []) from libms.DataStructures.Table import compressPeakMaps from libms.DataStructures.MSTypes import PeakMap, Spectrum import numpy compressPeakMaps(t) s = Spectrum(numpy.arange(12).reshape(-1,2), 1.0, 1, "+") pm = PeakMap([s]) s = Spectrum(numpy.arange(12).reshape(-1,2), 1.0, 1, "+") pm2 = PeakMap([s]) t = ms.toTable("pm", [pm, pm2]) assert len(set(map(id, t.pm.values))) == 2 compressPeakMaps(t) assert len(set(map(id, t.pm.values))) == 1
def testFullJoin(): t = ms.toTable("a", [None, 2, 3]) t2 = t.join(t) t2.print_() assert len(t2) == 9 assert t2.a.values == [None, None, None, 2, 2, 2, 3, 3, 3] assert t2.a__0.values == t.a.values * 3
def testSplitBy(): t = ms.toTable("a", [1,1,3,4]) t.addColumn("b", [1,1,3,3]) t.addColumn("c", [1,2,1,4]) t._print() subts = t.splitBy("a") assert len(subts) == 3 res = ms.mergeTables(subts) assert len(res) == len(t) subts[0]._print() assert res.a.values == t.a.values assert res.b.values == t.b.values assert res.c.values == t.c.values # check if input tables are not altered for subt in subts: assert subt.getColNames() == [ "a", "b", "c"] subts = t.splitBy("a", "c") assert len(subts) == 4 res = ms.mergeTables(subts) assert res.a.values == t.a.values assert res.b.values == t.b.values assert res.c.values == t.c.values # check if input tables are not altered for subt in subts: assert subt.getColNames() == [ "a", "b", "c"]
def testUniqeRows(): t = ms.toTable("a", [1, 1, 2, 2, 3, 3]) t.addColumn("b", [1, 1, 1, 2, 3, 3]) u = t.uniqueRows() assert u.a.values == [1, 2, 2, 3] assert u.b.values == [1, 1, 2, 3] assert len(u.getColNames()) == 2 u.info()
def testNonBoolean(): t = ms.toTable("a", []) try: not t.a except: pass else: raise Exception()
def testApply(): t = ms.toTable("a", [0.01, 0.1, 0.1, 0.015, 0.2,1.0 ]) t.addColumn("a_bin", t.a.apply(lambda v: int(v*100))) # this returned numpy-ints due to an fault in addColumn and so # we got 6 tables instead of 4: ts = t.splitBy("a_bin") assert len(ts) == 4
def testApply(): t = ms.toTable("a", [0.01, 0.1, 0.1, 0.015, 0.2, 1.0]) t.addColumn("a_bin", t.a.apply(lambda v: int(v * 100))) # this returned numpy-ints due to an fault in addColumn and so # we got 6 tables instead of 4: ts = t.splitBy("a_bin") assert len(ts) == 4
def test_getters_and_setters(): t = ms.toTable("a", [1, 2, 3]) assert t.getColType("a") == int assert t.getColFormat("a") == "%d" t.setColType("a", float) assert t.getColType("a") == float t.setColFormat("a", "%.3f") assert t.getColFormat("a") == "%.3f"
def testWithEmtpyTablesAndTestColnameGeneration(): e = ms.toTable("x", []) f = ms.toTable("y", []) g = ms.toTable("z", [1]) assert len(e.filter(e.x == 0)) == 0 t1 = e.join(f, f.y == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.join(f, e.x == f.y) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.join(g, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = e.join(g, e.x == g.z) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = g.join(e, e.x == g.z) assert len(t1) == 0 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() t1 = g.join(e, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() t1 = e.leftJoin(f, f.y == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.leftJoin(g, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = g.leftJoin(e, e.x == g.z) assert len(t1) == 1 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() assert t1.rows[0] == [1, None] t1.print_() f.print_() t2 = t1.leftJoin(f, f.y == t1.x__0) assert t2.getColNames() ==["z", "x__0", "y__1"], t2.getColNames() assert len(t2) == 1
def testMerge(): t1 = ms.toTable("a", [1]) t1.addColumn("b", [2]) t1.addColumn("c", [3]) t2 = ms.toTable("a", [1, 2]) t2.addColumn("c", [1, 3]) t2.addColumn("d", [1, 4]) tn = ms.mergeTables([t1, t2]) assert tn.a.values == [1, 1, 2] assert tn.b.values == [2, None, None] assert tn.c.values == [3, 1, 3] assert tn.d.values == [None, 1, 4] # check if input tables are not altered assert t1.getColNames() == ["a", "b", "c"] assert t2.getColNames() == ["a", "c", "d"]
def testMerge(): t1 = ms.toTable("a", [1]) t1.addColumn("b", [2]) t1.addColumn("c", [3]) t2 = ms.toTable("a", [1,2]) t2.addColumn("c", [1,3]) t2.addColumn("d", [1,4]) tn = ms.mergeTables([t1, t2]) assert tn.a.values == [1, 1, 2] assert tn.b.values == [2, None, None] assert tn.c.values == [3, 1, 3] assert tn.d.values == [None, 1, 4] # check if input tables are not altered assert t1.getColNames() == [ "a", "b", "c"] assert t2.getColNames() == [ "a", "c", "d"]
def testWithEmtpyTablesAndTestColnameGeneration(): e = ms.toTable("x", []) f = ms.toTable("y", []) g = ms.toTable("z", [1]) assert len(e.filter(e.x == 0)) == 0 t1 = e.join(f, f.y == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.join(f, e.x == f.y) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.join(g, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = e.join(g, e.x == g.z) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = g.join(e, e.x == g.z) assert len(t1) == 0 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() t1 = g.join(e, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() t1 = e.leftJoin(f, f.y == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "y__0"], t1.getColNames() t1 = e.leftJoin(g, g.z == e.x) assert len(t1) == 0 assert t1.getColNames() == ["x", "z__0"], t1.getColNames() t1 = g.leftJoin(e, e.x == g.z) assert len(t1) == 1 assert t1.getColNames() == ["z", "x__0"], t1.getColNames() assert t1.rows[0] == [1, None] t1.print_() f.print_() t2 = t1.leftJoin(f, f.y == t1.x__0) assert t2.getColNames() == ["z", "x__0", "y__1"], t2.getColNames() assert len(t2) == 1
def buildTable(): t = ms.toTable("mz",[1.0, 2.0, None]) t.addColumn("mzmin", t.mz-0.025) t.addColumn("mzmax", t.mz+0.025) t.addColumn("rt", [ 10.0, 20.0, None]) t.addColumn("rtmin", t.rt-1.0) t.addColumn("rtmax", t.rt+5.0) t.addColumn("peakmap", [ None, (1,2), None]) return t
def buildTable(): t = ms.toTable("mz", [1.0, 2.0, None]) t.addColumn("mzmin", t.mz - 0.025) t.addColumn("mzmax", t.mz + 0.025) t.addColumn("rt", [10.0, 20.0, None]) t.addColumn("rtmin", t.rt - 1.0) t.addColumn("rtmax", t.rt + 5.0) t.addColumn("peakmap", [None, (1, 2), None]) return t
def testIfNotNoneElse(): t = ms.toTable("a", [None, 2, 3]) t.print_() t.addColumn("b", t.a.ifNotNoneElse(3)) t.print_() t.addColumn("c", t.a.ifNotNoneElse(t.b + 1)) t.print_() assert t.b.values == [3, 2, 3] assert t.c.values == [4, 2, 3]
def testApply(): t = ms.toTable("a", [None, 2, 3]) t.addColumn("id", (t.a * t.a).apply(lambda v: int(v**0.5))) assert t.id.values == [None, 2, 3] sub = ms.toTable("mf", ["Na", "H2O", None]) # apply with Nones in cols expr = sub.mf.apply(mass.of) sub.addColumn("m0", expr) sub.addColumn("m0s", sub.m0.apply(str)) sub.print_() assert sub.getColTypes() == [str, float, str], sub.getColTypes() # apply without None values: sub = sub.filter(sub.m0.isNotNone()) assert len(sub) == 2 sub.addColumn("m02", sub.mf.apply(mass.of)) sub.addColumn("m0s2", sub.m0.apply(str)) assert sub.getColTypes() == [str, float, str, float, str]
def testJoinNameGeneration(): t = ms.toTable("a", []) t2 = t.copy() t = t.join(t2, False) assert t.getColNames() == ["a", "a__0"] t = t.join(t2, False) assert t.getColNames() == ["a", "a__0", "a__1"] t = t.join(t.copy(), False) assert t.getColNames() == ["a", "a__0", "a__1", "a__2", "a__3", "a__4"] t.dropColumns("a") t = t.join(t.copy(), False) assert t.getColNames() == ["a__%d" % i for i in range(10)]
def testInplaceColumnmodification(): t = ms.toTable("a", [1, 2, 3, 4]) t.a += 1 assert t.a.values == [2, 3, 4, 5] t.a *= 2 assert t.a.values == [4, 6, 8, 10] t.a /= 2 assert t.a.values == [2, 3, 4, 5] t.a -= 1 assert t.a.values == [1, 2, 3, 4] t.a.modify(lambda v: 0) assert t.a.values == [0, 0, 0, 0]
def buildTable2(): t = ms.toTable("mz",[1.0, 2.0, None]) t.addColumn("mzmin", t.mz-0.025) t.addColumn("mzmax", t.mz+0.025) t.addColumn("rt", [ 10.0, 20.0, None]) t.addColumn("rtmin", t.rt-1.0) t.addColumn("rtmax", t.rt+5.0) t.addColumn("peakmap", [ None, (1,2), None]) t._renameColumnsUnchecked(mz="mz__1",mzmin="mzmin__1", mzmax="mzmax__1", rt="rt__1", rtmin="rtmin__1", rtmax="rtmax__1", peakmap="peakmap__1") return t
def testOnColumns(): t = ms.toTable("factor", [1, 2, 1, 2, 1, 1, 2]) t.addColumn("dependent", t.factor * 1.1) F, p = ms.oneWayAnova(t.factor, t.factor * 1.1) assert p < 1e-12, p H, p = ms.kruskalWallis(t.factor, t.factor * 1.1) assert abs(p - 0.014305) / 0.014305 < 1e-4 t.addColumn("dependent2", [1.01, 2.01, 1.02, 2.02, .99, 0.98, 1.98]) F, p = ms.oneWayAnova(t.factor, t.dependent2) assert abs(p - 1.3e-8) / 1.3e-8 < 0.01 H, p = ms.kruskalWallis(t.factor, t.dependent2) assert abs(p - 0.033894) / 0.033894 < 1e-4
def testOnColumns(): t = ms.toTable("factor", [1,2,1,2,1,1,2]) t.addColumn("dependent", t.factor*1.1) F, p = ms.oneWayAnova(t.factor, t.factor*1.1) assert p<1e-12, p H, p = ms.kruskalWallis(t.factor, t.factor*1.1) assert abs(p-0.014305)/0.014305 < 1e-4 t.addColumn("dependent2", [1.01,2.01,1.02,2.02,.99, 0.98,1.98]) F, p = ms.oneWayAnova(t.factor, t.dependent2) assert abs(p-1.3e-8)/1.3e-8 < 0.01 H, p = ms.kruskalWallis(t.factor, t.dependent2) assert abs(p-0.033894)/0.033894 < 1e-4
def testBools(): t = ms.toTable("bool", [True, False, True, None]) assert t.bool.sum() == 2 assert t.bool.max() == True, t.bool.max() assert t.bool.min() == False, t.bool.min() t.addColumn("int", [1, 2, 3, 4]) t.addColumn("float", [1.0, 2, 3, 4]) t.addColumn("int_bool", (t.bool).thenElse(t.bool, t.int)) # test coercion (bool, int) to int: assert t.int_bool.values == [1, 2, 1, None] t.addColumn("int_float", (t.bool).thenElse(t.int, t.float)) assert t.int_float.values == [1.0, 2.0, 3.0, None], t.int_float.values t.addColumn("bool_float", (t.bool).thenElse(t.bool, t.float)) assert t.bool_float.values == [1.0, 2.0, 1.0, None]
def split_srm_peakmap_to_tables(peakmap, n_digits=2): """ Processes a srm/mrm peakmap. The result is a list of tables with chromatographic peaks of MS2 data. The peaks are integrated over the full time range of the individual MS2 peakmaps. n_digits is the precision of the precursor m/z values. Detecting the peaks does not use a peak detector as centWave, but uses mz ranges to fit a EMG model to the underlying raw peaks m/z traces. This avoids cumbersome parameter optimization of a peak detector and returns all peaks irrespective of filtering according to some heuristic criterion for peak quality. """ result = [] ms2_maps = peakmap.splitLevelN(2, n_digits) # half resolution according to n_digits: delta_mz = 0.5 * (0.1)**n_digits for pre_mz, ms2_map in ms2_maps: # get unique m/z values in level 2 map in ascending order: ions = sorted(set(mz for mz, I in ms2_map.msNPeaks(2))) # build a table with 'number of ions' rows table = ms.toTable("precursor", [pre_mz] * len(ions)) table.addColumn("fragment_ion", ions) # Set rt range for later integration. We do no specific peak detection # here but use the full time range: rtmin, rtmax = ms2_map.rtRange() table.addColumn("rtmin", rtmin) table.addColumn("rtmax", rtmax) table.addColumn("mzmin", table.fragment_ion - delta_mz) table.addColumn("mzmax", table.fragment_ion + delta_mz) table.addColumn("peakmap", ms2_map) # Now rtmin/rtmax, mzmin/mzmax and peakmap columns are created. These # are mandatory for fitting and integrating peaks with ms.integrate: table = ms.integrate(table, "emg_exact") result.append(table) return result
def testLogics(): t = ms.toTable("a", [True, False]) t.addColumn("nota", ~t.a) t.addColumn("true", t.a | True) t.addColumn("false", t.a & False) assert t.getColTypes() == 4 * [bool] assert len(t.filter(t.a & t.nota)) == 0 assert len(t.filter(t.a | t.true)) == 2 assert len(t.filter(t.a ^ t.nota)) == 2 assert len(t.filter(t.a ^ t.a)) == 0 bunch = t.getValues(t.rows[0]) assert bunch.a == True assert bunch.nota == False assert bunch.true == True assert bunch.false == False
def testRename(): t = ms.toTable("a", [1,1,3,4]) t.addColumn("b", [1,1,3,3]) t.addColumn("c", [1,2,1,4]) with ExceptionTester(): t.renameColumns(dict(d="e")) with ExceptionTester(): t.renameColumns(a="b") with ExceptionTester(): t.renameColumns(a="x", b="x") with ExceptionTester(): t.renameColumns(dict(a="f"), a="d") t.renameColumns(dict(a="x"), dict(c="z"), b="y") assert tuple(t.getColNames()) == ("x", "y", "z")
matched=iso.leftJoin(common, iso.mass.approxEqual(common.m0, 1*MMU)) matched._print()""") print """" Statistical Analysis ==================== The framework provides two methods for comparing two datasets by analysis of variance: classical *one way ANOVA* and non parametric *Kruskal Wallis* analysis. These methods work on tables (is anybody surprised ?) like this:: """ t = ms.toTable("group", [ 1,1,1,1,1,2,2,2,2,2,2]) t.addColumn("measurement", [ 1.0, 0.9, 1.2, 1.4, 2.1, 1.0, 2.2, 2.3, 1.9, 2.8, 2.3]) t.sortBy("measurement") run("""t._print()""") print """ ``ms.oneWayAnova`` returns the correspoding *F* and *p* value, ``ms.kruskalWallis`` the *H* and *p* value:: """ run("""F, p = ms.oneWayAnova(t.group, t.measurement) print p""") run("""H, p = ms.kruskalWallis(t.group, t.measurement) print p""")
def testUpdateColumn(): t = ms.toTable("a", [1, 2]) t.updateColumn("a", t.a + 1) assert t.a.values == [2, 3] t.updateColumn("b", t.a + 1) assert t.b.values == [3, 4]
def testSlicing(): t = ms.toTable("a", [1, 2, 3]) assert t[0].a.values == [1] assert t[:1].a.values == [1] assert t[1:].a.values == [2, 3] assert t[:].a.values == [1, 2, 3]
def testConstantColumn(): t = ms.toTable("a",[1,2,3]) t.addConstantColumn("b", dict()) assert len(set(id(x) for x in t.b.values)) == 1
def testSomeExpressions(): t = ms.toTable("mf", ["Ag", "P", "Pb", "P3Pb", "PbP"]) tn = t.filter(t.mf.containsElement("P")) assert len(tn) == 3 tn = t.filter(t.mf.containsElement("Pb")) assert len(tn) == 3
def testWithNoneValues(): # simple int compare ################################### t = ms.toTable("a", [None, 2]) t.print_() assert len(t.filter(t.a < 3)) == 1 t2 = t.copy() assert len(t.join(t2, t.a==t2.a)) == 1 t.leftJoin(t2, t.a==t2.a).print_() assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a") assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a", ascending=False) assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t = ms.toTable("a", [None, 2.0]) t.print_() assert len(t.filter(t.a < 3)) == 1 t2 = t.copy() assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 # simple float compare ################################## t.print_() t2.print_() t.leftJoin(t2, t.a<t2.a).print_() assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a", ascending=True) assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a", ascending=False) assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 # simple str compare ################################### t = ms.toTable("a", [None, "2"]) t.filter(t.a < "3").print_() assert len(t.filter(t.a < "3")) == 1 t2 = t.copy() assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a", ascending=True) assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.sortBy("a", ascending=False) assert len(t.join(t2, t.a==t2.a)) == 1 assert len(t.leftJoin(t2, t.a==t2.a)) == 2 assert len(t.join(t2, t.a<=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a<t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 t.join(t2, t.a!=t2.a).print_() assert len(t.join(t2, t.a!=t2.a)) == 0 assert len(t.leftJoin(t2, t.a!=t2.a)) == 2 assert len(t.join(t2, t.a>=t2.a)) == 1 assert len(t.leftJoin(t2, t.a<=t2.a)) == 2 assert len(t.join(t2, t.a>t2.a)) == 0 assert len(t.leftJoin(t2, t.a<t2.a)) == 2 # simple float compare reversed ######################### t = ms.toTable("a", [None, 2.0]) t.print_() assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 t.sortBy("a") t.print_() t.filter(3 > t.a).print_() assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 t.sortBy("a", ascending=False) assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 # simple int compare reversed ######################### t = ms.toTable("a", [None, 2]) t.print_() assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 t.sortBy("a") assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 t.sortBy("a", ascending=False) assert len(t.filter(3 > t.a)) == 1 assert len(t.filter(3 >= t.a)) == 1 assert len(t.filter(3 == t.a)) == 0 assert len(t.filter(3 < t.a)) == 0 assert len(t.filter(3 <= t.a)) == 0 assert len(t.filter(3 != t.a)) == 1 assert len(t.filter(3.0 > t.a)) == 1 assert len(t.filter(3.0 >= t.a)) == 1 assert len(t.filter(3.0 == t.a)) == 0 assert len(t.filter(3.0 < t.a)) == 0 assert len(t.filter(3.0 <= t.a)) == 0 assert len(t.filter(3.0 != t.a)) == 1 # simple str compare reversed ######################### t = ms.toTable("a", [None, "2"]) t.print_() assert len(t.filter("3" > t.a)) == 1 assert len(t.filter("3" >= t.a)) == 1 assert len(t.filter("3" == t.a)) == 0 assert len(t.filter("3" < t.a)) == 0 assert len(t.filter("3" <= t.a)) == 0 assert len(t.filter("3" != t.a)) == 1 t.sortBy("a") assert len(t.filter("3" > t.a)) == 1 assert len(t.filter("3" >= t.a)) == 1 assert len(t.filter("3" == t.a)) == 0 assert len(t.filter("3" < t.a)) == 0 assert len(t.filter("3" <= t.a)) == 0 assert len(t.filter("3" != t.a)) == 1 t.sortBy("a", ascending=False) assert len(t.filter("3" > t.a)) == 1 assert len(t.filter("3" >= t.a)) == 1 assert len(t.filter("3" == t.a)) == 0 assert len(t.filter("3" < t.a)) == 0 assert len(t.filter("3" <= t.a)) == 0 assert len(t.filter("3" != t.a)) == 1 ########################################################## t = ms.toTable("i", [1,2,None]) assert len(t.filter(t.i.isNone())) == 1 assert len(t.filter(t.i.isNotNone())) == 2 t.addColumn("b", [2,3,None]) assert t.getColNames() == ["i", "b"] t.replaceColumn("b", t.b+1) assert t.getColNames() == ["i", "b"] t.addRow([None, None]) t.addRow([3, None]) t.addRow([3, 3.0]) assert t.b.values == [ 3, 4, None, None, None, 3] # check order t.replaceColumn("i", t.i) assert t.getColNames() == ["i", "b"] s = ms.toTable("b",[]) x = t.join(s, t.b == s.b) assert len(x) == 0 assert s.b.max() == None
def testOnTables(): setOne = [] t = ms.toTable("compound", ["A", "B"]) t.addColumn("area", [ 1.0, 2.0]) setOne.append(t) t = t.copy() t.area += 0.01 setOne.append(t) t = t.copy() t.replaceColumn("area",[None, 4.2]) setOne.append(t) t = t.copy() t.replaceColumn("area",[1.3, 4.7]) setOne.append(t) t = t.copy() t.replaceColumn("area",[2.3, 8.7]) setOne.append(t) setTwo = [] t = t.copy() t.replaceColumn("area",[2.2, 7.7]) setTwo.append(t) t = t.copy() t.replaceColumn("area",[2.2, 7.7]) setTwo.append(t) t = t.copy() t.replaceColumn("area",[2.2, 7.7]) setTwo.append(t) t = t.copy() t.replaceColumn("area",[2.6, 7.6]) setTwo.append(t) t = t.copy() t.replaceColumn("area",[2.2, 7.7]) setTwo.append(t) t = t.copy() t.replaceColumn("area",[2.9, 7.6]) setTwo.append(t) tresult = ms.oneWayAnovaOnTables(setOne, setTwo, idColumn="compound", valueColumn="area") assert tresult.id.values == ["A", "B"] assert tresult.n1.values == [4, 5] assert tresult.n2.values == [6, 6] assert abs(tresult.p_value.values[0]-9.11e-3)/9.11e-3 < 1e-2 assert abs(tresult.p_value.values[1]-1.44e-2)/1.44e-2 < 1e-2 assert tresult.title=="ANOVA ANALYSIS" tresult = ms.kruskalWallisOnTables(setOne, setTwo, idColumn="compound", valueColumn="area") tresult.print_() assert tresult.id.values == ["A", "B"] assert tresult.n1.values == [4, 5] assert tresult.n2.values == [6, 6] assert abs(tresult.p_value.values[0]-7.84e-2)/7.84e-2 < 1e-2 assert abs(tresult.p_value.values[1]-9.18e-2)/9.18e-2 < 1e-2 assert tresult.title=="KRUSKAL WALLIS ANALYSIS"