コード例 #1
0
    def regressor_dev_test(self, dev_set, dev_value_set, dev_date_set, dev_stock_id_set, save_clsfy_path="mlp_trade_regressor", is_cv=False, include_top_list = None):
        # test mode


        if not include_top_list:
            include_top_list = [1]
        mlp_regressor = self.mlp_regressor
        pred_value_list = np.array(mlp_regressor.predict(dev_set))
        actual_value_list = np.array(dev_value_set)
        mrse = calculate_rmse(actual_value_list, pred_value_list)
        date_list = dev_date_set
        stock_id_list = dev_stock_id_set

        avg_price_change_tuple, var_tuple, std_tuple = get_avg_price_change(pred_value_list, actual_value_list,
                                                                                  date_list, stock_id_list,
                                                                                  include_top_list=
                                                                                  include_top_list)

        # count how many predicted value has the same polarity as actual value
        polar_list = [1 for x, y in zip(pred_value_list, actual_value_list) if x * y >= 0]
        polar_count = len(polar_list)
        polar_percent = polar_count / len(pred_value_list)
        #

        # <uncomment for debugging>
        if not is_cv:
            print("----------------------------------------------------------------------------------------")
            print("actual_value_list, ", actual_value_list)
            print("pred_value_list, ", pred_value_list)
            print("polarity: {}".format(polar_percent))
            print("mrse: {}".format(mrse))
            print("avg_price_change: {}".format(avg_price_change_tuple))
            print("----------------------------------------------------------------------------------------")
        else:
            pass
            # print("Testing complete! Testing Set size: {}".format(len(self.r_dev_value_set)))
            # <uncomment for debugging>
    # ------------------------------------------------------------------------------------------------------------------
        return mrse, avg_price_change_tuple[0], polar_percent
コード例 #2
0
    def regressor_dev(self, save_clsfy_path="mlp_trade_regressor", is_cv=False, include_top_list = None):
        # test mode


        if not include_top_list:
            include_top_list = [1]
        mlp_regressor = pickle.load(open(save_clsfy_path, "rb"))
        pred_value_list = np.array(mlp_regressor.predict(self.dev_set))
        actual_value_list = np.array(self.dev_value_set)
        date_list = self.dev_date_set
        stock_id_list = self.dev_stock_id_set
        avg_price_change_tuple, var_tuple, std_tuple = get_avg_price_change(pred_value_list, actual_value_list,
                                                                                  date_list, stock_id_list,
                                                                                  include_top_list=
                                                                                  include_top_list)

        # compute accuracy in terms of positive and negative

        # (3.) get the pred label for each week
        pred_label_dict_by_week = collections.defaultdict(lambda :[])
        golden_label_dict_by_week = collections.defaultdict(lambda :[])
        pred_value_dict_by_week = collections.defaultdict(lambda :[])
        golden_value_dict_by_week = collections.defaultdict(lambda :[])


        pred_label_list = ['pos' if x >= 0 else 'neg' for x in pred_value_list ]
        actual_label_list = ['pos' if x >= 0 else 'neg' for x in actual_value_list ]

        for i, pred_label in enumerate(pred_label_list):
            date = self.dev_date_set[i]
            # classification
            pred_label_dict_by_week[date].append(pred_label)
            golden_label = actual_label_list[i]
            golden_label_dict_by_week[date].append(golden_label)
            #
            # regression
            predict_value = pred_value_list[i]
            golden_value = actual_value_list[i]
            pred_value_dict_by_week[date].append(predict_value)
            golden_value_dict_by_week[date].append(golden_value)

        week_average_f1_list = []
        week_average_accuracy_list = []
        week_average_rmse = []
        dev_label_dict = collections.defaultdict(lambda: 0)
        pred_label_dict = collections.defaultdict(lambda: 0)
        label_f1_list_all = []

        # (4.) compute the f1, accuracy for each week in 1 validation set
        for date, pred_label_list_for_1_week in pred_label_dict_by_week.items():
            pred_label_list = pred_label_list_for_1_week
            golden_label_list = golden_label_dict_by_week[date]

            # (3.) compute the average f-measure

            _,average_f1  = compute_average_f1(pred_label_list, golden_label_list)
            week_average_f1_list.append(average_f1)
            #average_f1 = f1_list[0] # using F-measure
            #

            # (4.) compute accuracy
            correct = 0
            for i, pred_label in enumerate(pred_label_list):
                if pred_label == golden_label_list[i]:
                    correct += 1
            accuracy = correct / len(golden_label_list)
            week_average_accuracy_list.append(accuracy)
            #

            # (5.) count the occurrence for each label
            for dev_label in golden_label_list:
                dev_label_dict[dev_label] += 1
            for pred_label in pred_label_list:
                pred_label_dict[pred_label] += 1
            #

            # # (6.) save rmse

            pred_value_list1 = pred_value_dict_by_week[date]
            actual_value_list1 = golden_value_dict_by_week[date]
            rmse = calculate_rmse(actual_value_list1, pred_value_list1)
            week_average_rmse.append(rmse)


        week_average_f1 = np.average(week_average_f1_list)
        week_average_accuracy = np.average(week_average_accuracy_list)
        week_average_rmse = np.average(week_average_rmse)


        # <uncomment for debugging>
        if not is_cv:
            print("----------------------------------------------------------------------------------------")
            print("actual_value_list, ", actual_value_list)
            print("pred_value_list, ", pred_value_list)
            print("week_average_accuracy: {}".format(week_average_accuracy))
            print("week_average_f1: {}".format(week_average_f1))
            print("week_average_rmse: {}".format(week_average_rmse))
            print("week_average_price_change: {}".format(avg_price_change_tuple))
            print("----------------------------------------------------------------------------------------")
        else:
            pass
            # print("Testing complete! Testing Set size: {}".format(len(self.r_dev_value_set)))
            # <uncomment for debugging>
    # ------------------------------------------------------------------------------------------------------------------
        return week_average_rmse, avg_price_change_tuple, week_average_accuracy, week_average_f1
コード例 #3
0
    def baseline_reg_dev(self, target_folder):
        file1 = os.listdir(target_folder)[0]
        file1_path = os.path.join(target_folder, file1)
        with open(file1_path, 'r') as f:
            feature_list = f.readlines()[0].strip().split(',')[::2]

        key_index = feature_list.index('percent_change_price')
        print ("key_index: ", key_index)

        pred_value_list = []
        for dev_sample in self.dev_set:
            percent_change_price = float(dev_sample[key_index])*0.01
            pred_value_list.append(percent_change_price)

        actual_value_list = np.array(self.dev_value_set)

        date_list = self.dev_date_set
        stock_id_list = self.dev_stock_id_set
        include_top_list = [1]

        avg_price_change_tuple, var_tuple, std_tuple = get_avg_price_change(pred_value_list, actual_value_list,
                                                                                  date_list, stock_id_list,
                                                                                  include_top_list=
                                                                                  include_top_list)

        date_actual_avg_priceChange_list = get_chosen_stock_return(pred_value_list, actual_value_list, date_list,
                          stock_id_list, include_top_list=None)

        # compute accuracy in terms of positive and negative

        # (3.) get the pred label for each week
        pred_label_dict_by_week = collections.defaultdict(lambda :[])
        golden_label_dict_by_week = collections.defaultdict(lambda :[])
        pred_value_dict_by_week = collections.defaultdict(lambda :[])
        golden_value_dict_by_week = collections.defaultdict(lambda :[])


        pred_label_list = ['pos' if x >= 0 else 'neg' for x in pred_value_list ]
        actual_label_list = ['pos' if x >= 0 else 'neg' for x in actual_value_list ]

        for i, pred_label in enumerate(pred_label_list):
            date = self.dev_date_set[i]
            # classification
            pred_label_dict_by_week[date].append(pred_label)
            golden_label = actual_label_list[i]
            golden_label_dict_by_week[date].append(golden_label)
            #
            # regression
            predict_value = pred_value_list[i]
            golden_value = actual_value_list[i]
            pred_value_dict_by_week[date].append(predict_value)
            golden_value_dict_by_week[date].append(golden_value)

        week_average_f1_list = []
        week_average_accuracy_list = []
        week_average_rmse = []
        dev_label_dict = collections.defaultdict(lambda: 0)
        pred_label_dict = collections.defaultdict(lambda: 0)
        label_f1_list_all = []

        # (4.) compute the f1, accuracy for each week in 1 validation set
        for date, pred_label_list_for_1_week in pred_label_dict_by_week.items():
            pred_label_list = pred_label_list_for_1_week
            golden_label_list = golden_label_dict_by_week[date]

            # (3.) compute the average f-measure

            _,average_f1  = compute_average_f1(pred_label_list, golden_label_list)
            week_average_f1_list.append(average_f1)
            #average_f1 = f1_list[0] # using F-measure
            #

            # (4.) compute accuracy
            correct = 0
            for i, pred_label in enumerate(pred_label_list):
                if pred_label == golden_label_list[i]:
                    correct += 1
            accuracy = correct / len(golden_label_list)
            week_average_accuracy_list.append(accuracy)
            #

            # (5.) count the occurrence for each label
            for dev_label in golden_label_list:
                dev_label_dict[dev_label] += 1
            for pred_label in pred_label_list:
                pred_label_dict[pred_label] += 1
            #

            # # (6.) save rmse

            pred_value_list1 = pred_value_dict_by_week[date]
            actual_value_list1 = golden_value_dict_by_week[date]
            rmse = calculate_rmse(actual_value_list1, pred_value_list1)
            week_average_rmse.append(rmse)


        week_average_f1 = np.average(week_average_f1_list)
        week_average_accuracy = np.average(week_average_accuracy_list)
        week_average_rmse = np.average(week_average_rmse)


        # # <uncomment for debugging>
        # print("----------------------------------------------------------------------------------------")
        # print("actual_value_list, ", actual_value_list)
        # print("pred_value_list, ", pred_value_list)
        # print("week_average_accuracy: {}".format(week_average_accuracy))
        # print("week_average_f1: {}".format(week_average_f1))
        # print("week_average_rmse: {}".format(week_average_rmse))
        # print("week_average_price_change: {}".format(avg_price_change_tuple))
        # print("----------------------------------------------------------------------------------------")

    # ------------------------------------------------------------------------------------------------------------------
        return week_average_rmse, avg_price_change_tuple, week_average_accuracy, week_average_f1, \
               date_actual_avg_priceChange_list
コード例 #4
0
        pred_value_list, actual_value_list, date_list, stock_id_list = rnn1.reg_dev_for_moving_window_test(
        )
        #
        #print ("date_list: ", date_list)
        pred_label_list_temp = [
            'pos' if x >= 0 else 'neg' for x in pred_value_list
        ]
        actual_label_list_temp = [
            'pos' if x >= 0 else 'neg' for x in actual_value_list
        ]
        pred_label_list.extend(pred_label_list_temp)
        actual_label_list.extend(actual_label_list_temp)
        data_list_for_classification.extend(date_list)
        #
        rmse = calculate_rmse(actual_value_list, pred_value_list)
        avg_price_change_tuple, var_tuple, std_tuple = get_avg_price_change(
            pred_value_list, actual_value_list, date_list, stock_id_list)

        chosen_stock_return_list_temp = get_chosen_stock_return(
            pred_value_list, actual_value_list, date_list, stock_id_list)
        avg_price_change_1 = avg_price_change_tuple[
            0]  # Strategy: choose the top 1 stock each week
        var_1 = var_tuple[0]
        std_1 = std_tuple[0]
        avg_price_change_list.append(avg_price_change_1)
        print("chosen_stock_return_list_temp: ", chosen_stock_return_list_temp)
        var_list.append(var_1)
        std_list.append(std_1)
        chosen_stock_return_list.extend(chosen_stock_return_list_temp)
        rmse_list.append(rmse)

        # date set