Beispiel #1
0
    def _train(self, df_train, df_test, score):
        df_train = df_train.sort_values(["sym", "date"])

        df_train_1 = df_train[df_train[score.get_name()] < 0.5]

        #df_train_1 = df_train_1.sort_values([self.confer.scores[1].get_name()], ascending=True).head(int(len(df_train_1)/3*2)).tail(int(len(df_train_1)/3))
        #print(df_train_1.head()[["sym", "date", "close", self.confer.scores[1].get_name()]])
        df_train_2 = df_train[df_train[score.get_name()] > 0.5]
        #df_train_2 = df_train_2.sort_values([self.confer.scores[1].get_name()], ascending=False).head(int(len(df_train_2)/3*2)).tail(int(len(df_train_2)/3))
        #print(df_train_2.head()[["sym", "date", "close", self.confer.scores[1].get_name()]])

        # @ccl
        df_train_2 = df_train_2.sample(n = len(df_train_1))
        assert(len(df_train_2) == len(df_train_1))
        df_train = pd.concat([df_train_1, df_train_2], axis=0)
        df_train = df_train.sample(frac=1.0)
        assert(len(df_train) == 2*len(df_train_1))

        print("train start : %s train end: %s total:%d" % (df_train.sort_values('date').head(1)['date'].values[0],
                                                  df_train.sort_values('date').tail(1)['date'].values[0], len(df_train)))
        npTrainFeat, npTrainLabel = base.extract_feat_label(df_train, score.get_name())
        df_test = df_test.sort_values(["sym", "date"])
        df_test_1 = df_test[df_test[score.get_name()] < 0.5]
        df_test_2 = df_test[df_test[score.get_name()] > 0.5]
#        assert len(df_test_1) + len(df_test_2) == len(df_test)
        df_test_2 = df_test_2.sample(n = len(df_test_1))
        assert(len(df_test_2) == len(df_test_1))
        df_test = pd.concat([df_test_1, df_test_2], axis=0)
        assert(len(df_test) == 2*len(df_test_1))
        df_test = df_test.sample(frac=1.0, random_state = 1253)
        npTestFeat, npTestLabel = base.extract_feat_label(df_test, score.get_name())
        #self.confer.classifier.fit(npTrainFeat, npTrainLabel, npTestFeat, npTestLabel, npTestFeat, npTestLabel)
        #self.confer.classifier.fit(npTrainFeat, npTrainLabel, npTestFeat, npTestLabel)
        self.confer.classifier.fit(npTrainFeat, npTrainLabel, df_test, score.get_name())
Beispiel #2
0
    def on_epoch_end(self, epoch, logs={}):
        if epoch % (self.interval*5) != 0:
            return
        print("")
        print("LONG...")
        (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest)
        print("TEST: ", end='')
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print("VALD: ", end='')
        scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds)
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print()

        is_first = True
        for df_year in self.df_years:
            npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True)
            year = df_year['yyyy'].unique()[0]
            print("%s: " % year, end='')
            if is_first:
                (thresholds, scores) = self.cal_accuracy(npFeat, npLabel)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
                is_first = False
            else:
                scores = self.cal_accuracy2(npFeat, npLabel, thresholds)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
            print()
        print("SHORT...")
        (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest, is_short=True)
        print("TEST: ", end='')
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print("VALD: ", end='')
        scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds, is_short=True)
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print()

        is_first = True
        for df_year in self.df_years:
            npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True)
            year = df_year['yyyy'].unique()[0]
            print("%s: " % year, end='')
            if is_first:
                (thresholds, scores) = self.cal_accuracy(npFeat, npLabel, is_short=True)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
                is_first = False
            else:
                scores = self.cal_accuracy2(npFeat, npLabel, thresholds, is_short=True)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
            print()
Beispiel #3
0
    def __init__(self, cls, validation_data=(), interval=10):
        super(Callback, self).__init__()
        self.interval = interval
        self.cls = cls
        self.df_test_valid, self.score = validation_data
        #self.df_test = self.df_test_valid.sample(frac=0.5, random_state=200)
        self.df_test_valid = self.df_test_valid.sort_values("date", ascending=True)
        self.df_test = self.df_test_valid.head(int(len(self.df_test_valid)/2))
        self.df_valid = self.df_test_valid.drop(self.df_test.index)
        assert(len(self.df_valid) + len(self.df_test) == len(self.df_test_valid))

        self.df_years = [each for each in base.split_by_year(self.df_test_valid)]

        self.npFeatTest, self.npLabelTest = base.extract_feat_label(self.df_test, self.score, drop=True)
        self.npFeatVal, self.npLabelVal = base.extract_feat_label(self.df_valid, self.score, drop=True)
Beispiel #4
0
    def __init__(self, cls, validation_data=(), interval=10):
        super(Callback, self).__init__()
        self.interval = interval
        self.cls = cls
        self.df_test_valid, self.score = validation_data
        #self.df_test = self.df_test_valid.sample(frac=0.5, random_state=200)
        self.df_test_valid = self.df_test_valid.sort_values("date", ascending=True)
        self.df_test = self.df_test_valid.head(int(len(self.df_test_valid)/2))
        self.df_valid = self.df_test_valid.drop(self.df_test.index)
        assert(len(self.df_valid) + len(self.df_test) == len(self.df_test_valid))

        self.df_years = [each for each in base.split_by_year(self.df_test_valid)]

        self.npFeatTest, self.npLabelTest = base.extract_feat_label(self.df_test, self.score, drop=True)
        self.npFeatVal, self.npLabelVal = base.extract_feat_label(self.df_valid, self.score, drop=True)
Beispiel #5
0
    def on_epoch_end(self, epoch, logs={}):
        return
        if epoch % (self.interval*5) != 0:
            return
        print("")
        print("LONG...")
        (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest)
        print("TEST: ", end='')
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print("VALD: ", end='')
        scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds)
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print()

        is_first = True
        for df_year in self.df_years:
            npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True)
            year = df_year['yyyy'].unique()[0]
            print("%s: " % year, end='')
            if is_first:
                (thresholds, scores) = self.cal_accuracy(npFeat, npLabel)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
                is_first = False
            else:
                scores = self.cal_accuracy2(npFeat, npLabel, thresholds)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
            print()
        print("SHORT...")
        (thresholds, scores) = self.cal_accuracy(self.npFeatTest, self.npLabelTest, is_short=True)
        print("TEST: ", end='')
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print("VALD: ", end='')
        scores = self.cal_accuracy2(self.npFeatVal, self.npLabelVal, thresholds, is_short=True)
        for i in range(len(thresholds)):
            print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
        print()
        print()

        is_first = True
        for df_year in self.df_years:
            npFeat, npLabel = base.extract_feat_label(df_year, self.score,drop=True)
            year = df_year['yyyy'].unique()[0]
            print("%s: " % year, end='')
            if is_first:
                (thresholds, scores) = self.cal_accuracy(npFeat, npLabel, is_short=True)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
                is_first = False
            else:
                scores = self.cal_accuracy2(npFeat, npLabel, thresholds, is_short=True)
                for i in range(len(thresholds)):
                    print("score: %.3f(%.3f)" % (scores[i], thresholds[i]), end=" ")
            print()