예제 #1
0
def get_labels_barray(selected_slice=None):

    if selected_slice==None:
        return blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))
    else:
        assert isinstance(selected_slice,slice)
        return blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))[selected_slice]
예제 #2
0
def modelPredictor(modelsPath_modelIndex_dataPath_colNames_tuple):
    """
    Input: A tuple, with following two attributes (with order):
            modelsPath: string, the path to the trained models. (pickle file)
            modelIndex: integer, the index of the model to predict.
            dataPath: string, the path to the data.
            colNames: a list of strings, column names of the output table. It should be like ["Id", "V1", ...]
    Output: A btable, consists of Id column, Predicted column and the data.
    
    Notes:
    modelPredictor will create following directories for you if they do not exist.
            1. Model_No{modelIndex}_predicted_array: it will be under the dataPath.
    """
    # Set up necessary constance.
    divideN = 300000
    modelsPath, modelIndex, dataPath, colNames = modelsPath_modelIndex_dataPath_colNames_tuple
    def data_abspath(colname):
        return os.path.abspath(os.path.join(dataPath, colname))
    with open(modelsPath, "rb") as rf:
        models = pickle.load(rf)
    model = models[modelIndex]
    del models
    
    # Read in data with btable.
    Id = blz.open(os.path.join(dataPath, colNames[0]))
    totalN = len(Id)
    if totalN % divideN == 0:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)]
    else:
        nodes_list = [i * divideN for i in range(totalN / divideN + 1)] + [totalN]
    nodes_pair_list = zip(nodes_list[:-1], nodes_list[1:])
    
    # Prediction.
    y_predict = np.zeros(totalN)
    print "[Model No.{modelIndex}] Prediction process begins.".format(modelIndex = modelIndex)
    for begin, end in nodes_pair_list:
        print "[Model No.{modelIndex}] Processing {begin} ~ {end} observations.".format(modelIndex=modelIndex, begin = begin + 1, end = end)
        columns = [blz.open(os.path.join(dataPath, colname))[begin:end] for colname in colNames[1:]]
        X = np.column_stack(columns)
        temp = model.predict(X)
        y_predict[begin:end] = temp
    
    columns = [Id, blz.barray(y_predict)]
    data_rootdir = os.path.join(dataPath, "Model_No{modelIndex}_predicted_array".format(modelIndex = modelIndex))
    if data_rootdir in os.listdir(dataPath):
        print "Removing Old result_table directory for new btable."
        command = "rm -rf " + data_rootdir
        os.system(command)
    final_table = blz.btable(columns = columns, names = ["Id", "Predict"], rootdir = data_rootdir)
    print "The result_table btable rootdir is under {path}".format(path=data_rootdir)
예제 #3
0
def drop(persist):
    """Remove a persistent storage."""

    persist = _persist_convert(persist)

    if persist.format == 'blz':
        try:
            blz.open(rootdir=persist.path)
            from shutil import rmtree
            rmtree(persist.path)
        except RuntimeError:
            # Maybe BLZ should throw other exceptions for this!
            raise Exception("No data set at uri '%s'" % persist.uri)
    elif persist.format in ('csv', 'json', 'hdf5'):
        import os
        os.unlink(persist.path)
예제 #4
0
def drop(persist):
    """Remove a persistent storage."""

    persist = _persist_convert(persist)

    if persist.format == 'blz':
        try:
            blz.open(rootdir=persist.path)
            from shutil import rmtree
            rmtree(persist.path)
        except RuntimeError:
            # Maybe BLZ should throw other exceptions for this!
            raise Exception("No data set at uri '%s'" % persist.uri)
    elif persist.format in ('csv', 'json', 'hdf5'):
        import os
        os.unlink(persist.path)
예제 #5
0
 def test00b(self):
     """Testing `barray` reshape (large shape)"""
     a = np.arange(16000).reshape((20,20,40))
     b = blz.arange(16000, rootdir=self.rootdir).reshape((20,20,40))
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #6
0
 def test01b(self):
     """Testing `zeros` constructor (II)"""
     a = np.zeros(2, dtype='(2,4)i4')
     b = blz.zeros(2, dtype='(2,4)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #7
0
 def test02(self):
     """Testing `ones` constructor"""
     a = np.ones((2,2), dtype='(4,)i4')
     b = blz.ones((2,2), dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #8
0
 def test03b(self):
     """Testing `fill` constructor (array default)"""
     a = np.ones((2,2), dtype='(4,)i4')*3
     b = blz.fill((2,2), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #9
0
 def test00a(self):
     """Testing `barray` reshape"""
     a = np.arange(16).reshape((2, 2, 4))
     b = blz.arange(16, rootdir=self.rootdir).reshape((2, 2, 4))
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #10
0
 def test00a(self):
     """Testing `barray` reshape"""
     a = np.arange(16).reshape((2,2,4))
     b = blz.arange(16, rootdir=self.rootdir).reshape((2,2,4))
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #11
0
 def test00b(self):
     """Testing `barray` reshape (large shape)"""
     a = np.arange(16000).reshape((20, 20, 40))
     b = blz.arange(16000, rootdir=self.rootdir).reshape((20, 20, 40))
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #12
0
 def test01b(self):
     """Testing `zeros` constructor (II)"""
     a = np.zeros(2, dtype='(2,4)i4')
     b = blz.zeros(2, dtype='(2,4)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #13
0
 def test02(self):
     """Testing `ones` constructor"""
     a = np.ones((2, 2), dtype='(4,)i4')
     b = blz.ones((2, 2), dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #14
0
 def test03a(self):
     """Testing `fill` constructor (scalar default)"""
     a = np.ones((2, 200), dtype='(4,)i4') * 3
     b = blz.fill((2, 200), 3, dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     #print "b->", `b`
     assert_array_equal(a, b, "Arrays are not equal")
예제 #15
0
    def testImplicitDtype(self):
        """Testing barray construction keeping dimensions (implicit dtype)"""
        a = np.eye(6)  # 2d
        b = blz.barray(a, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
예제 #16
0
 def test00b(self):
     """Testing `__getitem()__` method with only a start (slice)"""
     a = np.ones((27,2700), dtype="i4")*3
     b = blz.fill((27,2700), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = slice(1)
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #17
0
    def testImplicitDtype(self):
        """Testing barray construction keeping dimensions (implicit dtype)"""
        a = np.eye(6) # 2d
        b = blz.barray(a, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
예제 #18
0
 def test00b(self):
     """Testing `__getitem()__` method with only a start (slice)"""
     a = np.ones((27, 2700), dtype="i4") * 3
     b = blz.fill((27, 2700), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = slice(1)
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #19
0
    def testExplicitDtype(self):
        """Testing barray construction keeping dimensions (explicit dtype)"""
        dtype = np.dtype(np.float64)
        a = np.eye(6, dtype=dtype)
        b = blz.barray(a, dtype=dtype, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
예제 #20
0
 def test02(self):
     """Testing `__getitem()__` method with a start, stop, step"""
     a = np.ones((10,2), dtype="i4")*3
     b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = slice(1,9,2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #21
0
 def test03c(self):
     """Testing `__getitem()__` method with several slices (III)"""
     a = np.arange(120*1000).reshape((5*1000,4,3,2))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (slice(None,None,3), slice(1,3,2), slice(1,4,2))
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #22
0
 def test00a(self):
     """Testing `__getitem()__` method with only a start (scalar)"""
     a = np.ones((2, 3), dtype="i4") * 3
     b = blz.fill((2, 3), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = 1
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #23
0
    def testExplicitDtype(self):
        """Testing barray construction keeping dimensions (explicit dtype)"""
        dtype = np.dtype(np.float64)
        a = np.eye(6, dtype=dtype)
        b = blz.barray(a, dtype=dtype, rootdir=self.rootdir)
        if self.open:
            b = blz.open(rootdir=self.rootdir)

        # array equality implies having the same shape
        assert_array_equal(a, b, "Arrays are not equal")
예제 #24
0
 def test04c(self):
     """Testing `__getitem()__` method with shape reduction (III)"""
     a = np.arange(6000).reshape((50, 40, 3))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (1, slice(1, 4, 2), 2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #25
0
 def test03c(self):
     """Testing `__getitem()__` method with several slices (III)"""
     a = np.arange(120 * 1000).reshape((5 * 1000, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (slice(None, None, 3), slice(1, 3, 2), slice(1, 4, 2))
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #26
0
 def test02(self):
     """Testing `__getitem()__` method with a start, stop, step"""
     a = np.ones((10, 2), dtype="i4") * 3
     b = blz.fill((10, 2), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = slice(1, 9, 2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #27
0
 def test00a(self):
     """Testing `__getitem()__` method with only a start (scalar)"""
     a = np.ones((2,3), dtype="i4")*3
     b = blz.fill((2,3), 3, dtype="i4", rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = 1
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #28
0
 def test04c(self):
     """Testing `__getitem()__` method with shape reduction (III)"""
     a = np.arange(6000).reshape((50,40,3))
     b = blz.barray(a, rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     sl = (1,slice(1,4,2),2)
     #print "b[sl]->", `b[sl]`
     self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal")
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #29
0
 def test04(self):
     """Testing `fill` constructor with open and resize (array default)"""
     a = np.ones((3,200), dtype='(4,)i4')*3
     b = blz.fill((2,200), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     c = np.ones((1,200), dtype='(4,)i4')*3
     b.append(c)
     #print "b->", `b`, len(b), b[1]
     assert_array_equal(a, b, "Arrays are not equal")
예제 #30
0
 def load_prediction_blz(self, datatype="training", valuetype="decision_function", limit_instances=1000000):
     assert datatype in ["training","testing"]
     if datatype == "training":
         loading_blz_path = os.path.join(self.all_training_prediction_path,valuetype)
     else:
         loading_blz_path = os.path.join(self.all_testing_prediction_path,valuetype)
     
     #return os.path.exists(loading_blz_path), loading_blz_path
 
     if os.path.exists(loading_blz_path):
         return blz.open(loading_blz_path)
예제 #31
0
 def test00b(self):
     """Testing `__setitem()__` method with only a start (vector)"""
     a = np.ones((200, 300), dtype="i4") * 3
     b = blz.fill((200, 300), 3, dtype="i4", rootdir=self.rootdir)
     sl = slice(1)
     a[sl, :] = range(300)
     b[sl] = range(300)
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
 def __init__(self, path=None, mode='r', **kwargs):
     self.path = path
     self.mode = mode
     self.kwargs = kwargs
     if isinstance(path, (blz.barray, blz.btable)):
         self.blzarr = path
         self.path = path.rootdir
     elif mode != 'w':
         self.blzarr = blz.open(rootdir=path, mode=mode, **kwargs)
     else:
         # This will be set in the constructor later on
         self.blzarr = None
예제 #33
0
 def test02b(self):
     """Testing `__setitem()__` method with start,stop,step (scalar)"""
     a = np.ones((10, 2), dtype="i4") * 3
     b = blz.fill((10, 2), 3, dtype="i4", rootdir=self.rootdir)
     sl = slice(1, 8, 3)
     a[sl, :] = range(2)
     b[sl] = range(2)
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`, `b`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #34
0
 def test04(self):
     """Testing `fill` constructor with open and resize (array default)"""
     a = np.ones((3, 200), dtype='(4,)i4') * 3
     b = blz.fill((2, 200), [3, 3, 3, 3],
                  dtype='(4,)i4',
                  rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     c = np.ones((1, 200), dtype='(4,)i4') * 3
     b.append(c)
     #print "b->", `b`, len(b), b[1]
     assert_array_equal(a, b, "Arrays are not equal")
예제 #35
0
 def test02b(self):
     """Testing `__setitem()__` method with start,stop,step (scalar)"""
     a = np.ones((10,2), dtype="i4")*3
     b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir)
     sl = slice(1,8,3)
     a[sl,:] = range(2)
     b[sl] = range(2)
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`, `b`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #36
0
 def test05(self):
     """Testing `fill` constructor with open and resize (nchunks>1)"""
     a = np.ones((3,2000), dtype='(4,)i4')*3
     b = blz.fill((2,2000), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     c = np.ones((1,2000), dtype='(4,)i4')*3
     b.append(c)
     #print "b->", `b`
     # We need to use the b[:] here to overcome a problem with the
     # assert_array_equal() function
     assert_array_equal(a, b[:], "Arrays are not equal")
예제 #37
0
 def test00b(self):
     """Testing `__setitem()__` method with only a start (vector)"""
     a = np.ones((200,300), dtype="i4")*3
     b = blz.fill((200,300), 3, dtype="i4", rootdir=self.rootdir)
     sl = slice(1)
     a[sl,:] = range(300)
     b[sl] = range(300)
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #38
0
 def __init__(self, path=None, mode='r', **kwargs):
     self.path = path
     self.mode = mode
     self.kwargs = kwargs
     if isinstance(path, (blz.barray, blz.btable)):
         self.blzarr = path
         self.path = path.rootdir
     elif mode != 'w':
         self.blzarr = blz.open(rootdir=path, mode=mode, **kwargs)
     else:
         # This will be set in the constructor later on
         self.blzarr = None
예제 #39
0
 def test01a(self):
     """Testing `__setitem()__` method with start,stop (scalar)"""
     a = np.ones((500,200), dtype="i4")*3
     b = blz.fill((500,200), 3, dtype="i4", rootdir=self.rootdir,
                 bparams=blz.bparams())
     sl = slice(100,400)
     a[sl,:] = 0
     b[sl] = 0
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #40
0
 def test03d(self):
     """Testing `__setitem()__` method with several slices (IV)"""
     a = np.arange(120).reshape((5,4,3,2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (slice(1,3), slice(1,3,1), slice(1,None,2), slice(1))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[:], b[:], "Arrays are not equal")
예제 #41
0
 def test03d(self):
     """Testing `__setitem()__` method with several slices (IV)"""
     a = np.arange(120).reshape((5, 4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (slice(1, 3), slice(1, 3, 1), slice(1, None, 2), slice(1))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[:], b[:], "Arrays are not equal")
예제 #42
0
 def test04c(self):
     """Testing `__setitem()__` method with shape reduction (III)"""
     a = np.arange(24).reshape((4, 3, 2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (1, 2, slice(None, None, None))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #43
0
 def test04c(self):
     """Testing `__setitem()__` method with shape reduction (III)"""
     a = np.arange(24).reshape((4,3,2))
     b = blz.barray(a, rootdir=self.rootdir)
     sl = (1,2,slice(None,None,None))
     #print "before->", `b[sl]`
     a[sl] = 2
     b[sl] = 2
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "after->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #44
0
 def test05(self):
     """Testing `fill` constructor with open and resize (nchunks>1)"""
     a = np.ones((3, 2000), dtype='(4,)i4') * 3
     b = blz.fill((2, 2000), [3, 3, 3, 3],
                  dtype='(4,)i4',
                  rootdir=self.rootdir)
     if self.open:
         b = blz.open(rootdir=self.rootdir)
     c = np.ones((1, 2000), dtype='(4,)i4') * 3
     b.append(c)
     #print "b->", `b`
     # We need to use the b[:] here to overcome a problem with the
     # assert_array_equal() function
     assert_array_equal(a, b[:], "Arrays are not equal")
예제 #45
0
 def fit_model(self, append_self=True, feature_columns=TRAINING_COLUMN_NAMES[2:], label_columns=TRAINING_COLUMN_NAMES[1]):
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get features ... ")
     
     self.feature_columns = [one_col for one_col in TRAINING_COLUMN_NAMES if one_col in feature_columns]
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get labels ... ")
     
     if isinstance(label_columns,(str,unicode)):
         self.label_columns = [label_columns]
     
     read_columns = self.label_columns + self.feature_columns
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] build features blz ... ")
     
     training_cols_blz = map(lambda xx: blz.open(os.path.join(tools.TRAINING_BLZ_PATH,xx)),read_columns)
     
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get training_data_slices from blz ... ")
     
     if "training_data_slices" in self.__dict__:
         sample_data_arr = np.c_[map(lambda xx:xx[self.training_data_slices],training_cols_blz)].T
     else:
         sample_data_arr = np.c_[map(lambda xx:xx[0:],training_cols_blz)].T
         
     X = sample_data_arr[:,1:]
     y = sample_data_arr[:,0]
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] init model ... ")
     
     if "all_model_parameters" in self.__dict__:
         model = self.model_type(**self.all_model_parameters)
     
     elif "model_parameters" in self.__dict__:
         model = self.model_type(**self.model_parameters)
     
     else:
         model = self.model_type()
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] save model_info ... ")
     
     self.save_model_info()
     
     print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] fit model and return ... ")
     
     if append_self:
         self.model = model.fit(X,y)
         return self.model
     else:
         return model.fit(X,y)
예제 #46
0
 def create_kaggle_submit_csv(self, submit_format="%d,%.6f"):
     assert "predict_proba" in self.list_all_predictions["testing"]
     prediction_prob = self.load_prediction_blz(datatype="testing", valuetype="predict_proba")[:,1]
     ids_barray = blz.open(os.path.join(tools.TESTING_BLZ_PATH,TESTING_COLUMN_NAMES[0]))
     bt = blz.btable(columns=[ids_barray,prediction_prob], names=["Id","Predicted"])
     all_results = [submit_format % tuple(xx) for xx in bt.iter()]
     all_results_string = "\n".join([",".join(bt.names)] + all_results)
     
     submit_filename = "%s_%s.csv" % (self.model_id, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
     
     submit_filepath = os.path.join(SUBMITS_PATH,submit_filename)
     
     with open(submit_filepath,"w") as wf:
         wf.write(all_results_string)
예제 #47
0
파일: test_attrs.py 프로젝트: marascio/blz
    def test00b(self):
        """Accessing attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        # Re-open the barray
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr2'] == 'val2')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assert_(len(cn.attrs) == 3)
예제 #48
0
파일: test_attrs.py 프로젝트: pombreda/blz
    def test00b(self):
        """Accessing attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        # Re-open the barray
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr2'] == 'val2')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assert_(len(cn.attrs) == 3)
예제 #49
0
파일: test_attrs.py 프로젝트: marascio/blz
    def test01c(self):
        """Appending attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        # Reopen
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        # Append attrs
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr2'] == 'val2')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assert_(len(cn.attrs) == 3)
예제 #50
0
파일: test_attrs.py 프로젝트: pombreda/blz
    def test01c(self):
        """Appending attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        # Reopen
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        # Append attrs
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr2'] == 'val2')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assert_(len(cn.attrs) == 3)
예제 #51
0
 def test01a(self):
     """Testing `__setitem()__` method with start,stop (scalar)"""
     a = np.ones((500, 200), dtype="i4") * 3
     b = blz.fill((500, 200),
                  3,
                  dtype="i4",
                  rootdir=self.rootdir,
                  bparams=blz.bparams())
     sl = slice(100, 400)
     a[sl, :] = 0
     b[sl] = 0
     if self.open:
         b.flush()
         b = blz.open(rootdir=self.rootdir)
     #print "b[sl]->", `b[sl]`
     assert_array_equal(a[sl], b[sl], "Arrays are not equal")
예제 #52
0
파일: test_attrs.py 프로젝트: pombreda/blz
    def test01b(self):
        """Removing attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        # Reopen
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        # Remove one of them
        del cn.attrs['attr2']
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assertRaises(KeyError, cn.attrs.__getitem__, 'attr2')
        self.assert_(len(cn.attrs) == 2)
예제 #53
0
파일: test_attrs.py 프로젝트: marascio/blz
    def test01b(self):
        """Removing attributes in a opened barray."""

        cn = self.getobject()
        # Some attrs
        cn.attrs['attr1'] = 'val1'
        cn.attrs['attr2'] = 'val2'
        cn.attrs['attr3'] = 'val3'
        # Reopen
        if self.rootdir:
            cn = blz.open(rootdir=self.rootdir)
        # Remove one of them
        del cn.attrs['attr2']
        self.assert_(cn.attrs['attr1'] == 'val1')
        self.assert_(cn.attrs['attr3'] == 'val3')
        self.assertRaises(KeyError, cn.attrs.__getitem__, 'attr2')
        self.assert_(len(cn.attrs) == 2)
예제 #54
0
    def compute_training_data_logloss(self, return_type="value", 
                                      use_prediction="predict_proba", 
                                      prob_func=lambda xx:0.5 + 0.5*erf(xx/(2**0.5)),
                                      output_vector_type = "numpy",
                                      sample_slice  = slice(0,None,None)
                                      ):
    
        assert return_type in ["value", "vector"]
        assert output_vector_type in ["blz","numpy"]
        assert use_prediction in self.list_all_predictions["training"]
        
        print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[compute_training_data_logloss] conpute logloss with %s " % use_prediction)
        
        use_prediction_blz = self.load_prediction_blz(datatype="training", valuetype=use_prediction)
        
        
        if use_prediction == "predict_proba":
            prediction_prob = use_prediction_blz[sample_slice,1]
        else:
            #_vec_prob_func = np.vectorize(prob_func)
            #prediction_prob = np.apply_along_axis(prob_func,0,use_prediction_blz[0:])
            prediction_prob = prob_func(use_prediction_blz[sample_slice])
            
            #prediction_prob = blz.eval("prob_func(use_prediction_blz)", vm="python")
        
        #return prediction_prob
        
        exact_ans = blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))[sample_slice]
        
#         exact_ans = blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))
#         bt = blz.btable(columns=[prediction_prob,exact_ans],names=["y","py"])
        
        logloss_vector = blz_llvec_fun(exact_ans,prediction_prob,output_vector_type)
        logloss_value = logloss_vector.sum()/logloss_vector.shape[0]
        print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[compute_training_data_logloss] logloss_value = %s " % logloss_value)
        
        if return_type == "value":
            return logloss_value
        else:
            return logloss_vector