def _train(self, df_train, df_test, score): df_train = df_train.sort_values(["sym", "date"]) df_train_1 = df_train[df_train[score.get_name()] < 0.5] #df_train_1 = df_train_1.sort_values([self.confer.scores[1].get_name()], ascending=True).head(int(len(df_train_1)/3*2)).tail(int(len(df_train_1)/3)) #print(df_train_1.head()[["sym", "date", "close", self.confer.scores[1].get_name()]]) df_train_2 = df_train[df_train[score.get_name()] > 0.5] #df_train_2 = df_train_2.sort_values([self.confer.scores[1].get_name()], ascending=False).head(int(len(df_train_2)/3*2)).tail(int(len(df_train_2)/3)) #print(df_train_2.head()[["sym", "date", "close", self.confer.scores[1].get_name()]]) # @ccl df_train_2 = df_train_2.sample(n = len(df_train_1)) assert(len(df_train_2) == len(df_train_1)) df_train = pd.concat([df_train_1, df_train_2], axis=0) df_train = df_train.sample(frac=1.0) assert(len(df_train) == 2*len(df_train_1)) print("train start : %s train end: %s total:%d" % (df_train.sort_values('date').head(1)['date'].values[0], df_train.sort_values('date').tail(1)['date'].values[0], len(df_train))) npTrainFeat, npTrainLabel = base.extract_feat_label(df_train, score.get_name()) df_test = df_test.sort_values(["sym", "date"]) df_test_1 = df_test[df_test[score.get_name()] < 0.5] df_test_2 = df_test[df_test[score.get_name()] > 0.5] # assert len(df_test_1) + len(df_test_2) == len(df_test) df_test_2 = df_test_2.sample(n = len(df_test_1)) assert(len(df_test_2) == len(df_test_1)) df_test = pd.concat([df_test_1, df_test_2], axis=0) assert(len(df_test) == 2*len(df_test_1)) df_test = df_test.sample(frac=1.0, random_state = 1253) npTestFeat, npTestLabel = base.extract_feat_label(df_test, score.get_name()) #self.confer.classifier.fit(npTrainFeat, npTrainLabel, npTestFeat, npTestLabel, npTestFeat, npTestLabel) #self.confer.classifier.fit(npTrainFeat, npTrainLabel, npTestFeat, npTestLabel) self.confer.classifier.fit(npTrainFeat, npTrainLabel, df_test, score.get_name())
def on_epoch_end(self, epoch, logs={}): if epoch % (self.interval*5) != 0: return print("") print("LONG...") (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest) print("TEST: ", end='') for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("VALD: ", end='') scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print() is_first = True for df_year in self.df_years: npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True) year = df_year['yyyy'].unique()[0] print("%s: " % year, end='') if is_first: (thresholds, scores) = self.cal_accuracy(npFeat, npLabel) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") is_first = False else: scores = self.cal_accuracy2(npFeat, npLabel, thresholds) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("SHORT...") (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest, is_short=True) print("TEST: ", end='') for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("VALD: ", end='') scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print() is_first = True for df_year in self.df_years: npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True) year = df_year['yyyy'].unique()[0] print("%s: " % year, end='') if is_first: (thresholds, scores) = self.cal_accuracy(npFeat, npLabel, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") is_first = False else: scores = self.cal_accuracy2(npFeat, npLabel, thresholds, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print()
def __init__(self, cls, validation_data=(), interval=10): super(Callback, self).__init__() self.interval = interval self.cls = cls self.df_test_valid, self.score = validation_data #self.df_test = self.df_test_valid.sample(frac=0.5, random_state=200) self.df_test_valid = self.df_test_valid.sort_values("date", ascending=True) self.df_test = self.df_test_valid.head(int(len(self.df_test_valid)/2)) self.df_valid = self.df_test_valid.drop(self.df_test.index) assert(len(self.df_valid) + len(self.df_test) == len(self.df_test_valid)) self.df_years = [each for each in base.split_by_year(self.df_test_valid)] self.npFeatTest, self.npLabelTest = base.extract_feat_label(self.df_test, self.score, drop=True) self.npFeatVal, self.npLabelVal = base.extract_feat_label(self.df_valid, self.score, drop=True)
def on_epoch_end(self, epoch, logs={}): return if epoch % (self.interval*5) != 0: return print("") print("LONG...") (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest) print("TEST: ", end='') for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("VALD: ", end='') scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print() is_first = True for df_year in self.df_years: npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True) year = df_year['yyyy'].unique()[0] print("%s: " % year, end='') if is_first: (thresholds, scores) = self.cal_accuracy(npFeat, npLabel) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") is_first = False else: scores = self.cal_accuracy2(npFeat, npLabel, thresholds) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("SHORT...") (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest, is_short=True) print("TEST: ", end='') for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print("VALD: ", end='') scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print() print() is_first = True for df_year in self.df_years: npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True) year = df_year['yyyy'].unique()[0] print("%s: " % year, end='') if is_first: (thresholds, scores) = self.cal_accuracy(npFeat, npLabel, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") is_first = False else: scores = self.cal_accuracy2(npFeat, npLabel, thresholds, is_short=True) for i in range(len(thresholds)): print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ") print()