def testDynamicColumnAttributes(): t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"], []) t.a t.b t.c assert len(t.a.values) == 0 assert len(t.b.values) == 0 assert len(t.c.values) == 0 t.renameColumns(dict(a="aa")) assert "a" not in t.getColNames() assert "aa" in t.getColNames() t.aa try: t.a raise Exception("t.a should be deteted") except: pass col = pickle.loads(pickle.dumps(t.aa)) assert len(col.values) == 0 t.dropColumns("aa") assert "aa" not in t.getColNames() try: t.aa raise Exception("t.aa should be deteted") except: pass
def testToOpenMSFeatureMap(): t = Table("mz rt".split(), [float, float], 2 * ["%.6f"]) fm = toOpenMSFeatureMap(t) assert fm.size() == 0 t.addRow([1.0, 2.0]) fm = toOpenMSFeatureMap(t) assert fm.size() == 1 f = fm[0] assert f.getMZ() == 1.0 # == ok, as no digits after decimal point assert f.getRT() == 2.0 # dito
def setupTable(): names = "int long float str object array".split() types = [ int, long, float, str, object, np.ndarray, ] formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % o.shape"] row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))] row2 = [2, 22323L, 2.0, "hi2", [ 2, 3, ], np.array(((2, 3, 4), (1, 2, 3)))] row3 = [ 3, 32323L, 3.0, "hi3", (3, ), np.array(((3, 3, 4, 5), (1, 2, 3, 4))) ] rows = [row1, row2, row3] t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42)) t = t.extractColumns("int", "float", "str") t.addEnumeration() t._name = "t" t._print() return t
def testDynamicColumnAttributes(): t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"],[]) t.a t.b t.c assert len(t.a.values) == 0 assert len(t.b.values) == 0 assert len(t.c.values) == 0 t.renameColumns(dict(a="aa")) assert "a" not in t.getColNames() assert "aa" in t.getColNames() t.aa try: t.a raise Exception("t.a should be deteted") except: pass col = pickle.loads(pickle.dumps(t.aa)) assert len(col.values) == 0 t.dropColumns("aa") assert "aa" not in t.getColNames() try: t.aa raise Exception("t.aa should be deteted") except: pass
def testDoubleColumnames(): ex = None try: colnames = ["col0", "col0", "col1", "col1", "col2"] Table(colnames, [] * 5, [] * 5) except Exception, e: ex = e.message
def testSupportedPostfixes(): names = "mz mzmin mzmax mz0 mzmin0 mzmax0 mz1 mzmax1 mzmin__0 mzmax__0 mz__0 "\ "mzmax3 mz4 mzmin4".split() t = Table._create(names, [float] * len(names), []) assert len(t.supportedPostfixes(["mz"])) == len(names) assert t.supportedPostfixes(["mz", "mzmin"]) == ["", "0", "4", "__0"] assert t.supportedPostfixes(["mz", "mzmin", "mzmax"]) == ["", "0", "__0"]
def testRunnerTable(): with ExceptionTester(): Table(["a"], [np.float32], ["%f"], [[32.0]]) #build table names = "int long float str object array".split() types = [ int, long, float, str, object, np.ndarray, ] formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % (o.shape,)"] row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))] row2 = [2, 22323L, 2.0, "hi2", [ 2, 3, ], np.array(((2, 3, 4), (1, 2, 3)))] row3 = [ 3, 32323L, 3.0, "hi3", (3, ), np.array(((3, 3, 4, 5), (1, 2, 3, 4))) ] rows = [row1, row2, row3] t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42)) run(t, names, [row1, row2, row3]) # test pickle dat = pickle.dumps(t) t = pickle.loads(dat) run(t, names, [row1, row2, row3]) ms.storeTable(t, u"temp_output/test.table") try: ms.storeTable(t, "temp_output/test.table") assert False, "no exception thrown althoug file should exist!" except: pass ms.storeTable(t, "temp_output/test.table", True) t = ms.loadTable("temp_output/test.table") run(t, names, [row1, row2, row3])
def isotopeDistributionTable(formula, R=None, fullC13=False, minp=0.01, **kw): """ generates Table for most common isotopes of molecule with given mass *formula*. If the resolution *R* is given, the measurement device is simulated, and overlapping peaks may merge. *fullC13=True* assumes that only C13 carbon is present in formula. Further you can give a threshold *minp* for considering only isotope peaks with an abundance above the value. Standard is *minp=0.01*. If you have special elementary isotope abundances which differ from the natural abundances, you can tell that like ``ms.isotopeDistributionTable("S4C4", C=dict(C13=0.5, C12=0.5))`` Examples: .. pycon:: import ms !onlyoutput # natural abundances: tab = ms.isotopeDistributionTable("C3H7NO2") tab.abundance /= tab.abundance.sum() tab.print_() # artifical abundances: tab = ms.isotopeDistributionTable("C3H7NO2", C=dict(C13=0.5, C12=0.5)) tab.abundance /= tab.abundance.sum() tab.print_() \ """ from libms.DataStructures.Table import Table gen = _setupIsotopeDistributionGenerator(formula, R, fullC13, minp, **kw) t = Table(["mf", "mass", "abundance"], [str, float, float], ["%s", "%.6f", "%.3f"], []) for mass, abundance in gen.getCentroids(): t.addRow([formula, mass, abundance], False) t.resetInternals() return t
def testSomePredicates(): #build table names = "int long float str object array".split() types = [ int, long, float, str, object, np.ndarray, ] formats = ["%3d", "%d", "%.3f", "%s", "%r", "'array%r' % (o.shape,)"] row1 = [1, 12323L, 1.0, "hi", {1: 1}, np.array((1, 2, 3))] row2 = [2, 22323L, 2.0, "hi2", [ 2, 3, ], np.array(((2, 3, 4), (1, 2, 3)))] row3 = [ 3, 32323L, 3.0, "hi3", (3, ), np.array(((3, 3, 4, 5), (1, 2, 3, 4))) ] rows = [row1, row2, row3] t = Table(names, types, formats, rows, "testtabelle", meta=dict(why=42)) tn = t.filter((t.int + t.float).inRange(-1, 2)) assert len(tn) == 1 assert tn.getValue(tn.rows[0], "int") == 1 tn = t.filter((t.float + t.int).inRange(-1, 2)) assert len(tn) == 1 assert tn.getValue(tn.rows[0], "int") == 1 tn = t.filter(t.float.approxEqual(1.0, t.int / 10)) tn._print() assert len(tn) == 1, len(tn) assert tn.getValue(tn.rows[0], "int") == 1
def testSomePredicates(): #build table names="int long float str object array".split() types = [int, long, float, str, object, np.ndarray,] formats = [ "%3d", "%d", "%.3f", "%s", "%r", "'array%r' % (o.shape,)" ] row1 = [ 1, 12323L, 1.0, "hi", { 1: 1 }, np.array((1,2,3)) ] row2 = [ 2, 22323L, 2.0, "hi2", [2,3,], np.array(((2,3,4),(1,2,3))) ] row3 = [ 3, 32323L, 3.0, "hi3", (3,) , np.array(((3,3,4,5),(1,2,3,4))) ] rows = [row1, row2, row3] t=Table(names, types, formats, rows, "testtabelle", meta=dict(why=42)) tn = t.filter((t.int+t.float).inRange(-1, 2)) assert len(tn) == 1 assert tn.getValue(tn.rows[0], "int") == 1 tn = t.filter((t.float+t.int).inRange(-1, 2)) assert len(tn) == 1 assert tn.getValue(tn.rows[0], "int") == 1 tn = t.filter(t.float.approxEqual(1.0, t.int/10)) tn._print() assert len(tn) == 1, len(tn) assert tn.getValue(tn.rows[0], "int") == 1
def test_removePostfixes(): t = Table._create(["abb__0", "bcb__0"], [str] * 2, ["%s"] * 2) assert t.getColNames() == ["abb__0", "bcb__0"] t.removePostfixes() assert t.getColNames() == ["abb", "bcb"] t.removePostfixes("bb", "cb") assert t.getColNames() == ["a", "b"] try: t.print_() t.removePostfixes("a", "b") t.print_() except: pass else: assert False, "expected exception"
def setupTable(): names="int long float str object array".split() types = [int, long, float, str, object, np.ndarray,] formats = [ "%3d", "%d", "%.3f", "%s", "%r", "'array(%r)' % o.shape" ] row1 = [ 1, 12323L, 1.0, "hi", { 1: 1 }, np.array((1,2,3)) ] row2 = [ 2, 22323L, 2.0, "hi2", [2,3,], np.array(((2,3,4),(1,2,3))) ] row3 = [ 3, 32323L, 3.0, "hi3", (3,) , np.array(((3,3,4,5),(1,2,3,4))) ] rows = [row1, row2, row3] t=Table(names, types, formats, rows, "testtabelle", meta=dict(why=42)) t = t.extractColumns("int", "float", "str") t.addEnumeration() t._name = "t" t._print() return t
def loadCSV(path=None, sep=";", keepNone = False, **specialFormats): # local import in order to keep namespaces clean import ms import csv, os.path, sys, re from libms.DataStructures.Table import (Table, common_type_for,\ bestConvert, guessFormatFor) if isinstance(path, unicode): path = path.encode(sys.getfilesystemencoding()) elif path is None: path = ms.askForSingleFile(extensions=["csv"]) if path is None: return None with open(path,"r") as fp: # remove clutter at right margin reader = csv.reader(fp, delimiter=sep) # reduce multiple spaces to single underscore colNames = [ re.sub(" +", "_", n.strip()) for n in reader.next()] if keepNone: conv = bestConvert else: conv = lambda v: None if v=="None" else bestConvert(v) rows = [ [conv(c.strip()) for c in row] for row in reader] columns = [[row[i] for row in rows] for i in range(len(colNames))] types = [common_type_for(col) for col in columns] #defaultFormats = {float: "%.2f", str: "%s", int: "%d"} formats = dict([(name, guessFormatFor(name,type_)) for (name, type_)\ in zip(colNames, types)]) formats.update(specialFormats) formats = [formats[n] for n in colNames] title = os.path.basename(path) meta = dict(loaded_from=os.path.abspath(path)) return Table._create(colNames, types, formats, rows, title, meta)
def loadTable(path=None): """ load pickled table If *path* is missing, a dialog for file selection is opened instead. """ # local import in order to keep namespaces clean import ms import sys from libms.DataStructures.Table import Table, compressPeakMaps if isinstance(path, unicode): path = path.encode(sys.getfilesystemencoding()) elif path is None: path = ms.askForSingleFile(extensions=["table"]) if path is None: return None result = Table.load(path) compressPeakMaps(result) return result
def _build_starttable(tables, force_merge): colname_orders = [] for table in tables: colname_orders.append(table._colNames) colum_names = _topo_sort_with_in_order(colname_orders) if colum_names is None: raise Exception("could not combine all column names to a "\ "consistent order. you have to provide a reference table") types = dict() for table in tables: for name in table._colNames: type_ = table.getType(name) if types.get(name, type_) != type_: if not force_merge: raise Exception("type conflictfor column %s" % name) print "type conflict:",name, types.get(name, type_), type_ types[name] = type_ formats = dict() for table in tables: for name in table._colNames: format_ = table.getFormat(name) if formats.get(name, format_) != format_: if not force_merge: raise Exception("format conflict for column %s" % name) print "format conflict:", name, formats.get(name, format_), format_ formats[name] = format_ final_types = [types.get(n) for n in colum_names] final_formats = [formats.get(n) for n in colum_names] prototype = Table._create(colum_names, final_types, final_formats) return prototype, colum_names
def testIfThenElse(): t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"],[]) t.rows.append(["0", 1, 2]) t.rows.append([None, 2, 1]) t._print() t.addColumn("x", (t.a.isNotNone()).thenElse(t.b, t.c)) assert t.getColNames()==["a", "b", "c", "x"] print t._print() t.addColumn("y", (t.a.isNotNone()).thenElse("ok", "not ok")) t._print() assert t.y.values == ["ok", "not ok"]
def toTable(colName, iterable, fmt="", type_=None, title="", meta=None): return Table.toTable(colName, iterable, fmt, type_, title, meta)
def formulaTable(min_mass, max_mass, C=(0, None), H=(0, None), N=(0, None), O=(0, None), P=(0, None), S=(0, None), prune=True): """ This is a reduced Python version of HR2 formula generator, see http://fiehnlab.ucdavis.edu/projects/Seven_Golden_Rules/Software/ This function generates a table containing molecular formulas consisting of elements C, H, N, O, P and S having a mass in range [**min_mass**, **max_mass**]. For each element one can provide an given count or an inclusive range of atom counts considered in this process. If **prune** is *True*, mass ratio rules and valence bond checks are used to avoid unrealistic compounds in the table, else all formulas explaining the given mass range are generated. Putting some restrictions on atomcounts, eg **C=(0, 100)**, can speed up the process tremendously. """ import mass import math import collections from libms.DataStructures.Table import Table if isinstance(C, collections.Sequence): cmin, cmax = C else: cmin = cmax = C if isinstance(H, collections.Sequence): hmin, hmax = H else: hmin = hmax = H if isinstance(N, collections.Sequence): nmin, nmax = N else: nmin = nmax = N if isinstance(O, collections.Sequence): omin, omax = O else: omin = omax = O if isinstance(P, collections.Sequence): pmin, pmax = P else: pmin = pmax = P if isinstance(S, collections.Sequence): smin, smax = S else: smin = smax = S cmax = math.ceil(max_mass / mass.C) if cmax is None else cmax hmax = math.ceil(max_mass / mass.H) if hmax is None else hmax nmax = math.ceil(max_mass / mass.N) if nmax is None else nmax omax = math.ceil(max_mass / mass.O) if omax is None else omax pmax = math.ceil(max_mass / mass.P) if pmax is None else pmax smax = math.ceil(max_mass / mass.S) if smax is None else smax # upper bounds for x/C ratios: hcmax = 6 # 3 ncmax = 4 # 2 ocmax = 3 # 1.2 pcmax = 6 # 0.32 scmax = 2 # 0.65 # valence values for bound checks: valh = -1 valc = +2 valn = 1 valo = 0 valp = 3 vals = 4 int_range = lambda a, b: xrange(int(a), int(b)) rows = [] for c in int_range(cmin, cmax + 1): resmc_max = max_mass - c * mass.C s1 = min(smax, math.floor(resmc_max / mass.S)) if prune: s1 = min(s1, scmax * c) for s in int_range(smin, s1 + 1): resms_max = resmc_max - s * mass.S p1 = min(pmax, math.floor(resms_max / mass.P)) if prune: p1 = min(p1, pcmax * c) for p in int_range(pmin, p1 + 1): resmp_max = resms_max - p * mass.P o1 = min(omax, math.floor(resmp_max / mass.O)) if prune: o1 = min(o1, ocmax * c) for o in int_range(omin, o1 + 1): resmo_max = resmp_max - o * mass.O n1 = min(nmax, math.floor(resmo_max / mass.N)) if prune: n1 = min(n1, ncmax * c) for n in int_range(nmin, n1 + 1): resmn_max = resmo_max - n * mass.N h1 = min(hmax, math.floor(resmn_max / mass.H)) if prune: h1 = min(h1, hcmax * c) for h in int_range(hmin, h1 + 1): resmh_max = resmn_max - h * mass.H if 0 <= resmh_max <= max_mass - min_mass: bond = (2.0+c*valc+n*valn+o*valo+p*valp \ +s*vals+h*valh)/2.0 if not prune or (bond >= 0 and bond % 1 != 0.5): mf = "C%d.H%d.N%d.O%d.P%d.S%d." \ % (c, h, n, o, p, s) mf = mf.replace("C0.", ".") mf = mf.replace("H0.", ".") mf = mf.replace("N0.", ".") mf = mf.replace("O0.", ".") mf = mf.replace("P0.", ".") mf = mf.replace("S0.", ".") mf = mf.replace("C1.", "C.") mf = mf.replace("H1.", "H.") mf = mf.replace("N1.", "N.") mf = mf.replace("O1.", "O.") mf = mf.replace("P1.", "P.") mf = mf.replace("S1.", "S.") mf = mf.replace(".", "") rows.append([mf, max_mass - resmh_max]) return Table(["mf", "m0"], [str, float], ["%s", "%.5f"], rows)
def testIllegalRows(): try: t = Table(["a", "b"], [float, float], ["%f", "%f"], [(1, 2)]) except Exception, e: assert "not all rows are lists" in str(e), str(e)
def testDetectionOfUnallowdColumnNames(): ex = None try: Table(["__init__"], [int], ["%d"]) except Exception, e: ex = e.message
def testIfThenElse(): t = Table(["a", "b", "c"], [str, int, int], ["%s", "%d", "%d"], []) t.rows.append(["0", 1, 2]) t.rows.append([None, 2, 1]) t._print() t.addColumn("x", (t.a.isNotNone()).thenElse(t.b, t.c)) assert t.getColNames() == ["a", "b", "c", "x"] print t._print() t.addColumn("y", (t.a.isNotNone()).thenElse("ok", "not ok")) t._print() assert t.y.values == ["ok", "not ok"]