コード例 #1
0
def test_transpose():
    nelements = 1e3
    cat_a = d.bundle(x=n.array([1, 2, 3]),
                     y=n.array([4, 5, 6]),
                     z=n.array([7, 8, 9]))
    cat_b = d.bundle(x=n.array([1, 2, 3]),
                     y=n.array([4, 5, 6]),
                     z=n.array([7, 8, 9]))
    cat_c = d.bundle(x=n.array([1, 2, 3]),
                     y=n.array([4, 5, 6]),
                     z=n.array([7, 8, 9]),
                     other=n.array([10, 11, 12]))

    b = d.bundle(cat_a=cat_a, cat_b=cat_b, cat_c=cat_c)

    assert all(b.cat_a.x == [1, 2, 3])
    assert all(b.cat_b.y == [4, 5, 6])
    assert all(b.cat_c.y == [4, 5, 6])

    assert list(b.cat_a.keys()) == ["x", "y", "z"]
    assert list(b.cat_b.keys()) == ["x", "y", "z"]
    assert list(b.cat_c.keys()) == ["other", "x", "y", "z"]

    bt = b.transpose()

    assert list(bt.x.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert list(bt.y.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert list(bt.z.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert "other" not in list(bt.keys())

    assert id(bt.x.cat_a) == id(b.cat_a.x)
    assert id(bt.y.cat_a) == id(b.cat_a.y)
    assert id(bt.z.cat_a) == id(b.cat_a.z)
コード例 #2
0
def test():
    sig = d.bundle(var1 = n.random.normal(2,2,1e5), var2 = n.random.normal(1,1,1e5))
    bg  = d.bundle(var1 = n.random.normal(0,1,1e4), var2 = n.random.normal(-1,2,1e4))
    vars = d.bundle(sig=sig, bg=bg).transpose()
    weights = d.bundle(sig=n.ones(1e5), bg=n.ones(1e4))

    hist1d = d.bundleize(d.factory.hist1d)
    d.visual()

    def initfunc(vc,vars,weights,mask):
        vc.myfig = p.figure()
        p.figure(vc.myfig.number)
        h1 = hist1d( vars.var1[mask], n.linspace(-20,20,101), weights[mask])
        c = d.bundle(sig="r", bg="k")
        h1.line(c=c)

    def updatefunc(vc,vars,weights,mask):
        vc.myfig.clear()
        p.figure(vc.myfig.number)
        h1 = hist1d( vars.var1[mask], n.linspace(-20,20,101), weights[mask])
        c = d.bundle(sig="r", bg="k")
        h1.line(c=c)
        vc.myfig.canvas.draw()

    def anyfunc(*args):
        print args

    c = VisualCutter(vars,weights, "(vars.var1 > %(var1)s) & (vars.var2 < %(var2)s)", initfunc, updatefunc)
    c.run()
コード例 #3
0
ファイル: objbundle_test.py プロジェクト: IceCube-SPNO/dashi
def test_transpose():
    nelements = 1e3
    cat_a = d.bundle(x=n.array([1,2,3]), y=n.array([4,5,6]), z=n.array([7,8,9]))
    cat_b = d.bundle(x=n.array([1,2,3]), y=n.array([4,5,6]), z=n.array([7,8,9]))
    cat_c = d.bundle(x=n.array([1,2,3]), y=n.array([4,5,6]), z=n.array([7,8,9]), other=n.array([10,11,12]))

    b = d.bundle(cat_a=cat_a, cat_b=cat_b, cat_c=cat_c)

    assert all(b.cat_a.x == [1,2,3])
    assert all(b.cat_b.y == [4,5,6])
    assert all(b.cat_c.y == [4,5,6])

    assert list(b.cat_a.keys()) == ["x","y","z"]
    assert list(b.cat_b.keys()) == ["x","y","z"]
    assert list(b.cat_c.keys()) == ["other", "x","y","z"]
    
    bt = b.transpose()
    
    assert list(bt.x.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert list(bt.y.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert list(bt.z.keys()) == ["cat_a", "cat_b", "cat_c"]
    assert "other" not in list(bt.keys())

    assert id(bt.x.cat_a) == id(b.cat_a.x)
    assert id(bt.y.cat_a) == id(b.cat_a.y)
    assert id(bt.z.cat_a) == id(b.cat_a.z)
コード例 #4
0
 def updatefunc(vc,vars,weights,mask):
     vc.myfig.clear()
     p.figure(vc.myfig.number)
     h1 = hist1d( vars.var1[mask], n.linspace(-20,20,101), weights[mask])
     c = d.bundle(sig="r", bg="k")
     h1.line(c=c)
     vc.myfig.canvas.draw()
コード例 #5
0
    def __init__(self, varbundle, weights, cutstring, initfunc, updatefunc, basemask=None):
        """
            varbundle: varnames -> categories -> numpy arrays
        """
        self.vars = varbundle
        self.weights = weights

        self.varnames = self.vars.keys()
        self.catnames = self.vars.transpose().keys()

        self.initfunc = initfunc
        self.updatefunc = updatefunc

        self.cutstring = cutstring

        if basemask is None:
            self.basemask = d.bundle(**dict([(k, n.ones(len(weights.get(k)), dtype=bool)) for k in weights.keys()]))
        else:
            self.basemask = basemask

        self.ranges = dict()

        placeholders = re.findall("\%\((\w+)\)\w", cutstring)
        for ph in placeholders:
            if ph not in self.varnames:
                print "Couldn't identify key %s. Set range in self.ranges manually!" % ph
                self.ranges[ph] = None
            else:
                mi = n.nanmin( self.vars.get(ph).map(n.nanmin).values())
                ma = n.nanmax( self.vars.get(ph).map(n.nanmax).values())
                self.ranges[ph] = (mi,ma)
コード例 #6
0
def test_bundle_creation():
    b = d.bundle(y=1, z=2, x=3)

    assert list(b.keys()) == ["x", "y", "z"]
    assert b.x == 3
    assert b.y == 1
    assert b.z == 2

    assert b._b_type == int
    assert isinstance(b, d.objbundle.object_bundle)
コード例 #7
0
ファイル: objbundle_test.py プロジェクト: IceCube-SPNO/dashi
def test_bundle_creation():
    b = d.bundle(y=1, z=2, x=3)

    assert list(b.keys()) == ["x", "y", "z"]
    assert b.x == 3
    assert b.y == 1
    assert b.z == 2

    assert b._b_type == int
    assert isinstance(b, d.objbundle.object_bundle)
コード例 #8
0
ファイル: hub.py プロジェクト: tianluyuan/dashi
        def get_one_variable(self,varname,current,total, unpack_recarrays=False):
            " helper function that retrieves a single variable"
            print "  %3d/%d reading variable %s" % (current,total,varname),
            start2 = time.time()
            arrays = {}
            missing_datasets = []
            for name,dataset in self.datasets.iteritems():
                tmp = None
                try:
                    if varname in self.vars and (self.vars[varname].vardef is not None):
                        v = self.vars[varname]
                        tmp = dataset._ds_get(v.vardef)
                        if v.transform is not None:
                            tmp = v.transform(tmp)
                    else:
                        tmp = dataset._ds_get(varname)
                except ValueError:
                    missing_datasets.append(name)

                # tmp is now pointing either to None, a 1d array or a recarray with named columns
                if tmp is not None:
                    # unpack the different columns of the recarray into 1d arrays in differnt
                    # slots of the resulting bundle
                    if unpack_recarrays: 
                        if tmp.dtype.names is None:
                            arrays[name] = tmp
                        else:
                            for column in tmp.dtype.names:
                                arrays[name+"_"+column] = tmp[column]

                    # just store the array
                    else:
                        arrays[name] = tmp

            if len(arrays) == 0:
                print "| done after %d seconds" % (time.time() - start2)
                return None
            
            # add empty arrays where necessary
            # rationale: empty arrays are easier to handle than bundles with missing keys
            # TODO: maybe make this configureable
            if len(missing_datasets) > 0:  
                dtype = arrays.values()[0].dtype
                for name in missing_datasets:
                    arrays[name] = n.zeros(0, dtype=dtype)
                print "| filling empty keys",
            print "| done after %d seconds" % (time.time() - start2)
            sys.stdout.flush()

            return d.bundle(**arrays)
コード例 #9
0
ファイル: hub.py プロジェクト: iamankit1995/dashi
        def get_one_variable(self, varname, current, total, unpack_recarrays=False):
            " helper function that retrieves a single variable"
            print "  %3d/%d reading variable %s" % (current, total, varname),
            start2 = time.time()
            arrays = {}
            missing_datasets = []
            for name, dataset in self.datasets.iteritems():
                tmp = None
                try:
                    if varname in self.vars and (self.vars[varname].vardef is not None):
                        v = self.vars[varname]
                        tmp = dataset._ds_get(v.vardef)
                        if v.transform is not None:
                            tmp = v.transform(tmp)
                    else:
                        tmp = dataset._ds_get(varname)
                except ValueError:
                    missing_datasets.append(name)

                # tmp is now pointing either to None, a 1d array or a recarray with named columns
                if tmp is not None:
                    # unpack the different columns of the recarray into 1d arrays in differnt
                    # slots of the resulting bundle
                    if unpack_recarrays:
                        if tmp.dtype.names is None:
                            arrays[name] = tmp
                        else:
                            for column in tmp.dtype.names:
                                arrays[name + "_" + column] = tmp[column]

                    # just store the array
                    else:
                        arrays[name] = tmp

            if len(arrays) == 0:
                print "| done after %d seconds" % (time.time() - start2)
                return None

            # add empty arrays where necessary
            # rationale: empty arrays are easier to handle than bundles with missing keys
            # TODO: maybe make this configureable
            if len(missing_datasets) > 0:
                dtype = arrays.values()[0].dtype
                for name in missing_datasets:
                    arrays[name] = n.zeros(0, dtype=dtype)
                print "| filling empty keys",
            print "| done after %d seconds" % (time.time() - start2)
            sys.stdout.flush()

            return d.bundle(**arrays)
コード例 #10
0
ファイル: objbundle_test.py プロジェクト: IceCube-SPNO/dashi
def test_int_bundle():
    x = 5 
    y = -2

    x_mask = x>0
    y_mask = y>0

    b = d.bundle(x=x,y=y)
    b_mask = b>0
    assert b_mask.x == x_mask
    assert b_mask.y == y_mask

    b_add = b + 2
    assert b_add.x == (x+2)
    assert b_add.y == (y+2)
コード例 #11
0
def test_int_bundle():
    x = 5
    y = -2

    x_mask = x > 0
    y_mask = y > 0

    b = d.bundle(x=x, y=y)
    b_mask = b > 0
    assert b_mask.x == x_mask
    assert b_mask.y == y_mask

    b_add = b + 2
    assert b_add.x == (x + 2)
    assert b_add.y == (y + 2)
コード例 #12
0
def test_ndarray_bundle():
    x = n.random.normal(2, 2, 1e3)
    y = n.random.normal(-2, 2, 2e3)

    x_cut = x[x > 0]
    y_cut = y[y > 0]

    b = d.bundle(x=x, y=y)
    b_cut = b[b > 0]
    assert (b_cut.x == x_cut).all()
    assert (b_cut.y == y_cut).all()

    b_add = b + 2
    assert (b_add.x == (x + 2)).all()
    assert (b_add.y == (y + 2)).all()

    shapes = b_cut.shape
    assert shapes.x == x_cut.shape
    assert shapes.y == y_cut.shape

    sums = b_cut.sum()
    assert sums.x == x_cut.sum()
    assert sums.y == y_cut.sum()
コード例 #13
0
ファイル: objbundle_test.py プロジェクト: IceCube-SPNO/dashi
def test_diversify():
    x = n.random.normal(2,2,1e3)
    y = n.random.normal(-2,2,2e3)

    b = d.bundle(x=x,y=y)

    b2 = b.diversify({ "x" : ["x1", "x2"], "y" : ["y1", "y2"]})

    assert list(b2.keys()) == ["x1", "x2", "y1", "y2"]
    assert id(b2.x1) == id(x)
    assert id(b2.x2) == id(x)
    assert id(b2.y1) == id(y)
    assert id(b2.y2) == id(y)

    b3 = b.diversify({ "x" : ["x1", "x2"], "y" : ["y1", "y2"]}, copy=True)
    assert id(b3.x1) != id(x)
    assert id(b3.x2) != id(x)
    assert id(b3.y1) != id(y)
    assert id(b3.y2) != id(y)
    assert (b3.x1 == x).all()
    assert (b3.x2 == x).all()
    assert (b3.y1 == y).all()
    assert (b3.y2 == y).all()
コード例 #14
0
ファイル: objbundle_test.py プロジェクト: IceCube-SPNO/dashi
def test_ndarray_bundle():
    x = n.random.normal(2,2,1e3)
    y = n.random.normal(-2,2,2e3)

    x_cut = x[x>0]
    y_cut = y[y>0]

    b = d.bundle(x=x,y=y)
    b_cut = b[b>0]
    assert (b_cut.x == x_cut).all()
    assert (b_cut.y == y_cut).all()

    b_add = b + 2
    assert (b_add.x == (x+2)).all()
    assert (b_add.y == (y+2)).all()

    shapes = b_cut.shape
    assert shapes.x == x_cut.shape
    assert shapes.y == y_cut.shape

    sums = b_cut.sum()
    assert sums.x == x_cut.sum()
    assert sums.y == y_cut.sum()
コード例 #15
0
def test_diversify():
    x = n.random.normal(2, 2, 1e3)
    y = n.random.normal(-2, 2, 2e3)

    b = d.bundle(x=x, y=y)

    b2 = b.diversify({"x": ["x1", "x2"], "y": ["y1", "y2"]})

    assert list(b2.keys()) == ["x1", "x2", "y1", "y2"]
    assert id(b2.x1) == id(x)
    assert id(b2.x2) == id(x)
    assert id(b2.y1) == id(y)
    assert id(b2.y2) == id(y)

    b3 = b.diversify({"x": ["x1", "x2"], "y": ["y1", "y2"]}, copy=True)
    assert id(b3.x1) != id(x)
    assert id(b3.x2) != id(x)
    assert id(b3.y1) != id(y)
    assert id(b3.y2) != id(y)
    assert (b3.x1 == x).all()
    assert (b3.x2 == x).all()
    assert (b3.y1 == y).all()
    assert (b3.y2 == y).all()
コード例 #16
0
 def initfunc(vc,vars,weights,mask):
     vc.myfig = p.figure()
     p.figure(vc.myfig.number)
     h1 = hist1d( vars.var1[mask], n.linspace(-20,20,101), weights[mask])
     c = d.bundle(sig="r", bg="k")
     h1.line(c=c)
コード例 #17
0
ファイル: hub.py プロジェクト: iamankit1995/dashi
    def get(self, vars, unpack_recarrays=False):
        """
            varname is either a string or a list of strings 
            with variable names


            returns either a ndarray_bundle or a ndarray_bundle_bundle
        """
        start = time.time()

        def get_one_variable(self, varname, current, total, unpack_recarrays=False):
            " helper function that retrieves a single variable"
            print "  %3d/%d reading variable %s" % (current, total, varname),
            start2 = time.time()
            arrays = {}
            missing_datasets = []
            for name, dataset in self.datasets.iteritems():
                tmp = None
                try:
                    if varname in self.vars and (self.vars[varname].vardef is not None):
                        v = self.vars[varname]
                        tmp = dataset._ds_get(v.vardef)
                        if v.transform is not None:
                            tmp = v.transform(tmp)
                    else:
                        tmp = dataset._ds_get(varname)
                except ValueError:
                    missing_datasets.append(name)

                # tmp is now pointing either to None, a 1d array or a recarray with named columns
                if tmp is not None:
                    # unpack the different columns of the recarray into 1d arrays in differnt
                    # slots of the resulting bundle
                    if unpack_recarrays:
                        if tmp.dtype.names is None:
                            arrays[name] = tmp
                        else:
                            for column in tmp.dtype.names:
                                arrays[name + "_" + column] = tmp[column]

                    # just store the array
                    else:
                        arrays[name] = tmp

            if len(arrays) == 0:
                print "| done after %d seconds" % (time.time() - start2)
                return None

            # add empty arrays where necessary
            # rationale: empty arrays are easier to handle than bundles with missing keys
            # TODO: maybe make this configureable
            if len(missing_datasets) > 0:
                dtype = arrays.values()[0].dtype
                for name in missing_datasets:
                    arrays[name] = n.zeros(0, dtype=dtype)
                print "| filling empty keys",
            print "| done after %d seconds" % (time.time() - start2)
            sys.stdout.flush()

            return d.bundle(**arrays)

        if isinstance(vars, str):
            tmp = get_one_variable(self, vars, 1, 1, unpack_recarrays)
            print "total time:", time.time() - start
            return tmp
        elif isinstance(vars, list) and all([isinstance(i, str) for i in vars]):
            bundles = dict(
                [
                    (varname, get_one_variable(self, varname, i + 1, len(vars), unpack_recarrays))
                    for i, varname in enumerate(vars)
                ]
            )
            bundles = dict([(i, j) for i, j in bundles.iteritems() if j is not None])
            if len(bundles) == 0:
                print "total time:", time.time() - start
                return None
            else:
                tmp = d.bundle(**bundles)
                print "total time:", time.time() - start
                return tmp
        else:
            raise ValueError("vars must be either a string or a list of strings")
コード例 #18
0
ファイル: hub.py プロジェクト: tianluyuan/dashi
    def get(self, vars, unpack_recarrays=False):
        """
            varname is either a string or a list of strings 
            with variable names


            returns either a ndarray_bundle or a ndarray_bundle_bundle
        """
        start = time.time()

        def get_one_variable(self,varname,current,total, unpack_recarrays=False):
            " helper function that retrieves a single variable"
            print "  %3d/%d reading variable %s" % (current,total,varname),
            start2 = time.time()
            arrays = {}
            missing_datasets = []
            for name,dataset in self.datasets.iteritems():
                tmp = None
                try:
                    if varname in self.vars and (self.vars[varname].vardef is not None):
                        v = self.vars[varname]
                        tmp = dataset._ds_get(v.vardef)
                        if v.transform is not None:
                            tmp = v.transform(tmp)
                    else:
                        tmp = dataset._ds_get(varname)
                except ValueError:
                    missing_datasets.append(name)

                # tmp is now pointing either to None, a 1d array or a recarray with named columns
                if tmp is not None:
                    # unpack the different columns of the recarray into 1d arrays in differnt
                    # slots of the resulting bundle
                    if unpack_recarrays: 
                        if tmp.dtype.names is None:
                            arrays[name] = tmp
                        else:
                            for column in tmp.dtype.names:
                                arrays[name+"_"+column] = tmp[column]

                    # just store the array
                    else:
                        arrays[name] = tmp

            if len(arrays) == 0:
                print "| done after %d seconds" % (time.time() - start2)
                return None
            
            # add empty arrays where necessary
            # rationale: empty arrays are easier to handle than bundles with missing keys
            # TODO: maybe make this configureable
            if len(missing_datasets) > 0:  
                dtype = arrays.values()[0].dtype
                for name in missing_datasets:
                    arrays[name] = n.zeros(0, dtype=dtype)
                print "| filling empty keys",
            print "| done after %d seconds" % (time.time() - start2)
            sys.stdout.flush()

            return d.bundle(**arrays)


        if isinstance(vars, str):
            tmp = get_one_variable(self, vars, 1,1, unpack_recarrays)
            print "total time:", time.time()-start
            return tmp
        elif isinstance(vars, list) and all([isinstance(i, str) for i in vars]):
            bundles = dict( [ (varname, get_one_variable(self, varname,i+1,len(vars),unpack_recarrays)) 
                              for i,varname in enumerate(vars)] )
            bundles = dict( [ (i,j) for i,j in bundles.iteritems() if j is not None ] )
            if len(bundles) == 0:
                print "total time:", time.time()-start
                return None
            else:
                tmp =  d.bundle(**bundles)
                print "total time:", time.time()-start
                return tmp
        else:
            raise ValueError("vars must be either a string or a list of strings")
コード例 #19
0
import numpy as n
import dashi as d

x = n.array( [1,2,3,4, 5] )
y = n.array( [6,7,8,9,10] )

bundle = d.bundle( x=x, y=y )

x.sum()
y.sum()

bundle.sum()