def get_labels_barray(selected_slice=None): if selected_slice==None: return blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1])) else: assert isinstance(selected_slice,slice) return blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))[selected_slice]
def modelPredictor(modelsPath_modelIndex_dataPath_colNames_tuple): """ Input: A tuple, with following two attributes (with order): modelsPath: string, the path to the trained models. (pickle file) modelIndex: integer, the index of the model to predict. dataPath: string, the path to the data. colNames: a list of strings, column names of the output table. It should be like ["Id", "V1", ...] Output: A btable, consists of Id column, Predicted column and the data. Notes: modelPredictor will create following directories for you if they do not exist. 1. Model_No{modelIndex}_predicted_array: it will be under the dataPath. """ # Set up necessary constance. divideN = 300000 modelsPath, modelIndex, dataPath, colNames = modelsPath_modelIndex_dataPath_colNames_tuple def data_abspath(colname): return os.path.abspath(os.path.join(dataPath, colname)) with open(modelsPath, "rb") as rf: models = pickle.load(rf) model = models[modelIndex] del models # Read in data with btable. Id = blz.open(os.path.join(dataPath, colNames[0])) totalN = len(Id) if totalN % divideN == 0: nodes_list = [i * divideN for i in range(totalN / divideN + 1)] else: nodes_list = [i * divideN for i in range(totalN / divideN + 1)] + [totalN] nodes_pair_list = zip(nodes_list[:-1], nodes_list[1:]) # Prediction. y_predict = np.zeros(totalN) print "[Model No.{modelIndex}] Prediction process begins.".format(modelIndex = modelIndex) for begin, end in nodes_pair_list: print "[Model No.{modelIndex}] Processing {begin} ~ {end} observations.".format(modelIndex=modelIndex, begin = begin + 1, end = end) columns = [blz.open(os.path.join(dataPath, colname))[begin:end] for colname in colNames[1:]] X = np.column_stack(columns) temp = model.predict(X) y_predict[begin:end] = temp columns = [Id, blz.barray(y_predict)] data_rootdir = os.path.join(dataPath, "Model_No{modelIndex}_predicted_array".format(modelIndex = modelIndex)) if data_rootdir in os.listdir(dataPath): print "Removing Old result_table directory for new btable." command = "rm -rf " + data_rootdir os.system(command) final_table = blz.btable(columns = columns, names = ["Id", "Predict"], rootdir = data_rootdir) print "The result_table btable rootdir is under {path}".format(path=data_rootdir)
def drop(persist): """Remove a persistent storage.""" persist = _persist_convert(persist) if persist.format == 'blz': try: blz.open(rootdir=persist.path) from shutil import rmtree rmtree(persist.path) except RuntimeError: # Maybe BLZ should throw other exceptions for this! raise Exception("No data set at uri '%s'" % persist.uri) elif persist.format in ('csv', 'json', 'hdf5'): import os os.unlink(persist.path)
def test00b(self): """Testing `barray` reshape (large shape)""" a = np.arange(16000).reshape((20,20,40)) b = blz.arange(16000, rootdir=self.rootdir).reshape((20,20,40)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test01b(self): """Testing `zeros` constructor (II)""" a = np.zeros(2, dtype='(2,4)i4') b = blz.zeros(2, dtype='(2,4)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test02(self): """Testing `ones` constructor""" a = np.ones((2,2), dtype='(4,)i4') b = blz.ones((2,2), dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test03b(self): """Testing `fill` constructor (array default)""" a = np.ones((2,2), dtype='(4,)i4')*3 b = blz.fill((2,2), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00a(self): """Testing `barray` reshape""" a = np.arange(16).reshape((2, 2, 4)) b = blz.arange(16, rootdir=self.rootdir).reshape((2, 2, 4)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00a(self): """Testing `barray` reshape""" a = np.arange(16).reshape((2,2,4)) b = blz.arange(16, rootdir=self.rootdir).reshape((2,2,4)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00b(self): """Testing `barray` reshape (large shape)""" a = np.arange(16000).reshape((20, 20, 40)) b = blz.arange(16000, rootdir=self.rootdir).reshape((20, 20, 40)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test02(self): """Testing `ones` constructor""" a = np.ones((2, 2), dtype='(4,)i4') b = blz.ones((2, 2), dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test03a(self): """Testing `fill` constructor (scalar default)""" a = np.ones((2, 200), dtype='(4,)i4') * 3 b = blz.fill((2, 200), 3, dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def testImplicitDtype(self): """Testing barray construction keeping dimensions (implicit dtype)""" a = np.eye(6) # 2d b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) # array equality implies having the same shape assert_array_equal(a, b, "Arrays are not equal")
def test00b(self): """Testing `__getitem()__` method with only a start (slice)""" a = np.ones((27,2700), dtype="i4")*3 b = blz.fill((27,2700), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1) self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test00b(self): """Testing `__getitem()__` method with only a start (slice)""" a = np.ones((27, 2700), dtype="i4") * 3 b = blz.fill((27, 2700), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1) self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def testExplicitDtype(self): """Testing barray construction keeping dimensions (explicit dtype)""" dtype = np.dtype(np.float64) a = np.eye(6, dtype=dtype) b = blz.barray(a, dtype=dtype, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) # array equality implies having the same shape assert_array_equal(a, b, "Arrays are not equal")
def test02(self): """Testing `__getitem()__` method with a start, stop, step""" a = np.ones((10,2), dtype="i4")*3 b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1,9,2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test03c(self): """Testing `__getitem()__` method with several slices (III)""" a = np.arange(120*1000).reshape((5*1000,4,3,2)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (slice(None,None,3), slice(1,3,2), slice(1,4,2)) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test00a(self): """Testing `__getitem()__` method with only a start (scalar)""" a = np.ones((2, 3), dtype="i4") * 3 b = blz.fill((2, 3), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = 1 #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04c(self): """Testing `__getitem()__` method with shape reduction (III)""" a = np.arange(6000).reshape((50, 40, 3)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (1, slice(1, 4, 2), 2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test03c(self): """Testing `__getitem()__` method with several slices (III)""" a = np.arange(120 * 1000).reshape((5 * 1000, 4, 3, 2)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (slice(None, None, 3), slice(1, 3, 2), slice(1, 4, 2)) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test02(self): """Testing `__getitem()__` method with a start, stop, step""" a = np.ones((10, 2), dtype="i4") * 3 b = blz.fill((10, 2), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1, 9, 2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test00a(self): """Testing `__getitem()__` method with only a start (scalar)""" a = np.ones((2,3), dtype="i4")*3 b = blz.fill((2,3), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = 1 #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04c(self): """Testing `__getitem()__` method with shape reduction (III)""" a = np.arange(6000).reshape((50,40,3)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (1,slice(1,4,2),2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04(self): """Testing `fill` constructor with open and resize (array default)""" a = np.ones((3,200), dtype='(4,)i4')*3 b = blz.fill((2,200), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1,200), dtype='(4,)i4')*3 b.append(c) #print "b->", `b`, len(b), b[1] assert_array_equal(a, b, "Arrays are not equal")
def load_prediction_blz(self, datatype="training", valuetype="decision_function", limit_instances=1000000): assert datatype in ["training","testing"] if datatype == "training": loading_blz_path = os.path.join(self.all_training_prediction_path,valuetype) else: loading_blz_path = os.path.join(self.all_testing_prediction_path,valuetype) #return os.path.exists(loading_blz_path), loading_blz_path if os.path.exists(loading_blz_path): return blz.open(loading_blz_path)
def test00b(self): """Testing `__setitem()__` method with only a start (vector)""" a = np.ones((200, 300), dtype="i4") * 3 b = blz.fill((200, 300), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1) a[sl, :] = range(300) b[sl] = range(300) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def __init__(self, path=None, mode='r', **kwargs): self.path = path self.mode = mode self.kwargs = kwargs if isinstance(path, (blz.barray, blz.btable)): self.blzarr = path self.path = path.rootdir elif mode != 'w': self.blzarr = blz.open(rootdir=path, mode=mode, **kwargs) else: # This will be set in the constructor later on self.blzarr = None
def test02b(self): """Testing `__setitem()__` method with start,stop,step (scalar)""" a = np.ones((10, 2), dtype="i4") * 3 b = blz.fill((10, 2), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1, 8, 3) a[sl, :] = range(2) b[sl] = range(2) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]`, `b` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04(self): """Testing `fill` constructor with open and resize (array default)""" a = np.ones((3, 200), dtype='(4,)i4') * 3 b = blz.fill((2, 200), [3, 3, 3, 3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1, 200), dtype='(4,)i4') * 3 b.append(c) #print "b->", `b`, len(b), b[1] assert_array_equal(a, b, "Arrays are not equal")
def test02b(self): """Testing `__setitem()__` method with start,stop,step (scalar)""" a = np.ones((10,2), dtype="i4")*3 b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1,8,3) a[sl,:] = range(2) b[sl] = range(2) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]`, `b` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test05(self): """Testing `fill` constructor with open and resize (nchunks>1)""" a = np.ones((3,2000), dtype='(4,)i4')*3 b = blz.fill((2,2000), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1,2000), dtype='(4,)i4')*3 b.append(c) #print "b->", `b` # We need to use the b[:] here to overcome a problem with the # assert_array_equal() function assert_array_equal(a, b[:], "Arrays are not equal")
def test00b(self): """Testing `__setitem()__` method with only a start (vector)""" a = np.ones((200,300), dtype="i4")*3 b = blz.fill((200,300), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1) a[sl,:] = range(300) b[sl] = range(300) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500,200), dtype="i4")*3 b = blz.fill((500,200), 3, dtype="i4", rootdir=self.rootdir, bparams=blz.bparams()) sl = slice(100,400) a[sl,:] = 0 b[sl] = 0 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test03d(self): """Testing `__setitem()__` method with several slices (IV)""" a = np.arange(120).reshape((5,4,3,2)) b = blz.barray(a, rootdir=self.rootdir) sl = (slice(1,3), slice(1,3,1), slice(1,None,2), slice(1)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[:], b[:], "Arrays are not equal")
def test03d(self): """Testing `__setitem()__` method with several slices (IV)""" a = np.arange(120).reshape((5, 4, 3, 2)) b = blz.barray(a, rootdir=self.rootdir) sl = (slice(1, 3), slice(1, 3, 1), slice(1, None, 2), slice(1)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[:], b[:], "Arrays are not equal")
def test04c(self): """Testing `__setitem()__` method with shape reduction (III)""" a = np.arange(24).reshape((4, 3, 2)) b = blz.barray(a, rootdir=self.rootdir) sl = (1, 2, slice(None, None, None)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04c(self): """Testing `__setitem()__` method with shape reduction (III)""" a = np.arange(24).reshape((4,3,2)) b = blz.barray(a, rootdir=self.rootdir) sl = (1,2,slice(None,None,None)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test05(self): """Testing `fill` constructor with open and resize (nchunks>1)""" a = np.ones((3, 2000), dtype='(4,)i4') * 3 b = blz.fill((2, 2000), [3, 3, 3, 3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1, 2000), dtype='(4,)i4') * 3 b.append(c) #print "b->", `b` # We need to use the b[:] here to overcome a problem with the # assert_array_equal() function assert_array_equal(a, b[:], "Arrays are not equal")
def fit_model(self, append_self=True, feature_columns=TRAINING_COLUMN_NAMES[2:], label_columns=TRAINING_COLUMN_NAMES[1]): print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get features ... ") self.feature_columns = [one_col for one_col in TRAINING_COLUMN_NAMES if one_col in feature_columns] print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get labels ... ") if isinstance(label_columns,(str,unicode)): self.label_columns = [label_columns] read_columns = self.label_columns + self.feature_columns print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] build features blz ... ") training_cols_blz = map(lambda xx: blz.open(os.path.join(tools.TRAINING_BLZ_PATH,xx)),read_columns) print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] get training_data_slices from blz ... ") if "training_data_slices" in self.__dict__: sample_data_arr = np.c_[map(lambda xx:xx[self.training_data_slices],training_cols_blz)].T else: sample_data_arr = np.c_[map(lambda xx:xx[0:],training_cols_blz)].T X = sample_data_arr[:,1:] y = sample_data_arr[:,0] print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] init model ... ") if "all_model_parameters" in self.__dict__: model = self.model_type(**self.all_model_parameters) elif "model_parameters" in self.__dict__: model = self.model_type(**self.model_parameters) else: model = self.model_type() print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] save model_info ... ") self.save_model_info() print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[fit_model] fit model and return ... ") if append_self: self.model = model.fit(X,y) return self.model else: return model.fit(X,y)
def create_kaggle_submit_csv(self, submit_format="%d,%.6f"): assert "predict_proba" in self.list_all_predictions["testing"] prediction_prob = self.load_prediction_blz(datatype="testing", valuetype="predict_proba")[:,1] ids_barray = blz.open(os.path.join(tools.TESTING_BLZ_PATH,TESTING_COLUMN_NAMES[0])) bt = blz.btable(columns=[ids_barray,prediction_prob], names=["Id","Predicted"]) all_results = [submit_format % tuple(xx) for xx in bt.iter()] all_results_string = "\n".join([",".join(bt.names)] + all_results) submit_filename = "%s_%s.csv" % (self.model_id, datetime.datetime.now().strftime("%Y%m%d%H%M%S")) submit_filepath = os.path.join(SUBMITS_PATH,submit_filename) with open(submit_filepath,"w") as wf: wf.write(all_results_string)
def test00b(self): """Accessing attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' # Re-open the barray if self.rootdir: cn = blz.open(rootdir=self.rootdir) self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr2'] == 'val2') self.assert_(cn.attrs['attr3'] == 'val3') self.assert_(len(cn.attrs) == 3)
def test01c(self): """Appending attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' # Reopen if self.rootdir: cn = blz.open(rootdir=self.rootdir) # Append attrs cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr2'] == 'val2') self.assert_(cn.attrs['attr3'] == 'val3') self.assert_(len(cn.attrs) == 3)
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500, 200), dtype="i4") * 3 b = blz.fill((500, 200), 3, dtype="i4", rootdir=self.rootdir, bparams=blz.bparams()) sl = slice(100, 400) a[sl, :] = 0 b[sl] = 0 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test01b(self): """Removing attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' # Reopen if self.rootdir: cn = blz.open(rootdir=self.rootdir) # Remove one of them del cn.attrs['attr2'] self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr3'] == 'val3') self.assertRaises(KeyError, cn.attrs.__getitem__, 'attr2') self.assert_(len(cn.attrs) == 2)
def compute_training_data_logloss(self, return_type="value", use_prediction="predict_proba", prob_func=lambda xx:0.5 + 0.5*erf(xx/(2**0.5)), output_vector_type = "numpy", sample_slice = slice(0,None,None) ): assert return_type in ["value", "vector"] assert output_vector_type in ["blz","numpy"] assert use_prediction in self.list_all_predictions["training"] print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[compute_training_data_logloss] conpute logloss with %s " % use_prediction) use_prediction_blz = self.load_prediction_blz(datatype="training", valuetype=use_prediction) if use_prediction == "predict_proba": prediction_prob = use_prediction_blz[sample_slice,1] else: #_vec_prob_func = np.vectorize(prob_func) #prediction_prob = np.apply_along_axis(prob_func,0,use_prediction_blz[0:]) prediction_prob = prob_func(use_prediction_blz[sample_slice]) #prediction_prob = blz.eval("prob_func(use_prediction_blz)", vm="python") #return prediction_prob exact_ans = blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1]))[sample_slice] # exact_ans = blz.open(os.path.join(tools.TRAINING_BLZ_PATH,TRAINING_COLUMN_NAMES[1])) # bt = blz.btable(columns=[prediction_prob,exact_ans],names=["y","py"]) logloss_vector = blz_llvec_fun(exact_ans,prediction_prob,output_vector_type) logloss_value = logloss_vector.sum()/logloss_vector.shape[0] print PRINT_MESSAGE_FORMAT.format(model_id=self.model_id,message="[compute_training_data_logloss] logloss_value = %s " % logloss_value) if return_type == "value": return logloss_value else: return logloss_vector