Exemple #1
0
    df_end_date = temp_raw_df.Date[lastRow-1]
    
    feat_df = dSet.set_date_range(temp_raw_df, df_start_date, df_end_date)
    # Resolve any NA's for now
    feat_df.fillna(method='ffill', inplace=True)
    
    #set beLong level
    beLongThreshold = 0.000
    feat_df = ct.setTarget(temp_raw_df, "Long", beLongThreshold)

    # Adding features with new day
    input_dict = sysUtil.get_dict(system_directory, 'input_dict')
    feat_df = featureGen.generate_features(feat_df, input_dict)
    feat_df = transf.normalizer(feat_df, 'Volume', 50)
    
    col_vals = [k for k,v in feature_dict.items() if v == 'Drop']
    to_drop = ['Open','High','Low', 'gainAhead', 'Close', 'Volume', 'AdjClose', 'beLong']
    for x in to_drop:
        col_vals.append(x)
    model_data = dSet.drop_columns(feat_df, col_vals)
    
    # Retrieve model
    best_model_name = "SVM"
    best_model_segment = "segment-0"
    #best_model_name = system_dict["best_model"]
    file_title = "fit-model-" + best_model_name + "-IS-" + system_name + "-" + best_model_segment +".sav"
    file_name = os.path.join(system_directory, file_title)
    model = pickle.load(open(file_name, 'rb'))
    
    # get last row of data
    lastRow = model_data.shape[0]
    def model_and_test(self, dX, dy, model, model_results, tscv, info_dict,
                       evData):
        accuracy_scores_is = []
        accuracy_scores_oos = []
        precision_scores_is = []
        precision_scores_oos = []
        recall_scores_is = []
        recall_scores_oos = []
        f1_scores_is = []
        f1_scores_oos = []
        hit_rate_is = []
        hit_rate_oos = []

        #  Initialize the confusion matrix
        cm_sum_is = np.zeros((2, 2))
        cm_sum_oos = np.zeros((2, 2))
        #  For each entry in the set of splits, fit and predict

        for train_index, test_index in tscv.split(dX, dy):
            print("TRAIN:", train_index, "TEST:", test_index)
            X_train, X_test = dX[train_index], dX[test_index]
            y_train, y_test = dy[train_index], dy[test_index]
            #   print("TRAIN:", train_index, "TEST:", test_index)
            #   fit the model to the in-sample data
            model.fit(X_train, y_train)

            #  test the in-sample fit
            y_pred_is = model.predict(X_train)
            #print("%s: %0.3f" % ("Hit rate (IS)  ", model.score(X_train, y_train)))
            cm_is = confusion_matrix(y_train, y_pred_is)
            cm_sum_is = cm_sum_is + cm_is
            accuracy_scores_is.append(accuracy_score(y_train, y_pred_is))
            precision_scores_is.append(precision_score(y_train, y_pred_is))
            recall_scores_is.append(recall_score(y_train, y_pred_is))
            f1_scores_is.append(f1_score(y_train, y_pred_is))

            #  test the out-of-sample data
            y_pred_oos = model.predict(X_test)
            #print("%s: %0.3f" % ("Hit rate (OOS) ", model.score(X_test, y_test)))
            cm_oos = confusion_matrix(y_test, y_pred_oos)
            #print(model.score(X_test, y_test))
            cm_sum_oos = cm_sum_oos + cm_oos
            accuracy_scores_oos.append(accuracy_score(y_test, y_pred_oos))
            precision_scores_oos.append(precision_score(y_test, y_pred_oos))
            recall_scores_oos.append(recall_score(y_test, y_pred_oos))
            f1_scores_oos.append(f1_score(y_test, y_pred_oos))

        is_ev = self.print_expected_value(cm_sum_is, "In Sample", evData)
        oos_ev = self.print_expected_value(cm_sum_oos, "Out of Sample", evData)

        is_cm_results = self.conf_matrix_results(cm_sum_is, printResults=False)
        #print(is_cm_results)
        oos_cm_results = self.conf_matrix_results(cm_sum_oos,
                                                  printResults=False)
        #print(oos_cm_results)

        col_save = [k for k, v in feature_dict.items() if v == 'Keep']
        feature_count = len(col_save)

        model_results.append({
            'Issue':
            info_dict['issue'],
            'StartDate':
            info_dict['modelStartDate'].strftime("%Y-%m-%d"),
            'EndDate':
            info_dict['modelEndDate'].strftime("%Y-%m-%d"),
            'Model':
            info_dict['modelname'],
            'Rows':
            info_dict['nrows'],
            'beLongCount':
            str(np.count_nonzero(dy == 1)),
            'Features':
            col_save,
            'FeatureCount':
            feature_count,
            'Train-Accuracy':
            np.mean(accuracy_scores_is),
            'Train-Precision':
            np.mean(precision_scores_is),
            'Train-RMC':
            is_cm_results["rmc"],
            'Train-RF':
            is_cm_results["rf"],
            'Train-NPV':
            is_cm_results["npv"],
            'Train-MCC':
            is_cm_results["mcc"],
            'Train-Recall':
            np.mean(recall_scores_is),
            'Train-F1':
            np.mean(f1_scores_is),
            'Train-EV':
            is_ev * 100,
            'Test-Accuracy':
            np.mean(accuracy_scores_oos),
            'Test-Precision':
            np.mean(precision_scores_oos),
            'Test-RMC':
            oos_cm_results["rmc"],
            'Test-RF':
            oos_cm_results["rf"],
            'Test-NPV':
            oos_cm_results["npv"],
            'Test-MCC':
            oos_cm_results["mcc"],
            'Test-Recall':
            np.mean(recall_scores_oos),
            'Test-F1':
            np.mean(f1_scores_oos),
            'Test-EV':
            oos_ev * 100
        })

        return model_results, model
Exemple #3
0
                                            oos_months, segments)
    dataLoadStartDate = isOosDates[0]
    is_start_date = isOosDates[1]
    oos_start_date = isOosDates[2]
    is_months = isOosDates[3]
    is_end_date = isOosDates[4]
    oos_end_date = isOosDates[5]

    modelStartDate = is_start_date
    modelEndDate = modelStartDate + relativedelta(months=is_months)
    oosModelStartDate = oos_start_date
    oosModelEndDate = oosModelStartDate + relativedelta(months=oos_months)

    # Correlation study
    corrData = dataSet[modelStartDate:oosModelEndDate].copy()
    col_vals = [k for k, v in feature_dict.items() if v == 'Drop']
    to_drop = ['Open', 'High', 'Low', 'gainAhead', 'Close', 'beLong', 'Volume']
    for x in to_drop:
        col_vals.append(x)
    corrData = dSet.drop_columns(corrData, col_vals)

    plotIt.correlation_matrix(corrData)

    # Create correlation matrix
    corr_matrix = corrData.corr()
    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(
        np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    # Find index of feature columns with correlation greater than 0.85
    to_drop = [column for column in upper.columns if any(upper[column] > 0.85)]
    print(to_drop)
Exemple #4
0
    #                  'f36':
    #                  {'fname' : 'MFI',
    #                   'params' : [4]
    #                   }
    #                 }
    #
    df2 = featureGen.generate_features(df, input_dict)

    #print(df2.tail(10))

    # Plot price and indicators
    startDate = "2018-11-01"
    endDate = "2019-02-01"

    plotDataSet = df2[startDate:endDate]
    plot_dict = {}
    plot_dict['Issue'] = issue
    plot_dict['Plot_Vars'] = [
        k for k, v in feature_dict.items() if v == 'Keep'
    ]
    #plot_dict['Plot_Vars'] = list(feature_dict.keys())
    plot_dict['Volume'] = 'Yes'
    plotIt.price_Ind_Vol_Plot(plot_dict, plotDataSet)
#
#    # Drop columns where value is 'Drop'
#    col_vals=['Open', 'High', 'Close', 'Low']
#    corrDataSet = dSet.drop_columns(plotDataSet, col_vals)
#    col_vals = [k for k,v in feature_dict.items() if v == 'Drop']
#    corrDataSet = dSet.drop_columns(corrDataSet, col_vals)
#    plotIt.correlation_matrix(corrDataSet)