예제 #1
0
    oos_end_date = isOosDates[5]

    modelStartDate = is_start_date
    modelEndDate = modelStartDate + relativedelta(months=is_months)
    oosModelStartDate = oos_start_date
    oosModelEndDate = oosModelStartDate + relativedelta(months=oos_months)

    # Correlation study
    corrData = dataSet[modelStartDate:oosModelEndDate].copy()
    col_vals = [k for k, v in feature_dict.items() if v == 'Drop']
    to_drop = ['Open', 'High', 'Low', 'gainAhead', 'Close', 'beLong', 'Volume']
    for x in to_drop:
        col_vals.append(x)
    corrData = dSet.drop_columns(corrData, col_vals)

    plotIt.correlation_matrix(corrData)

    # Create correlation matrix
    corr_matrix = corrData.corr()
    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(
        np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    # Find index of feature columns with correlation greater than 0.85
    to_drop = [column for column in upper.columns if any(upper[column] > 0.85)]
    print(to_drop)
    for x in to_drop:
        feature_dict[x] = 'Drop'

    # initialize dataframes for trade analysis
    tradesDataFull = pd.DataFrame()
    valDataFull = pd.DataFrame()
예제 #2
0
 # save Dataset of analysis
 # THIS SHOULD BE A FUNCTION
 print("====Saving dataSet====\n")
 file_title = "raw-features-" + system_name + ".pkl"
 file_name = os.path.join(system_directory, file_title)
 dataSet2.to_pickle(file_name)
 
 # Examine correlations of features
 # Get columns to drop from feature_dict
 col_vals = [k for k,v in feature_dict.items() if v == 'Drop']
 # And set OHLC, etc., to Drop for cleaner correlation analysis
 to_drop = ['Open','High','Low', 'gainAhead', 'Close', 'beLong', 'AdjClose']
 for x in to_drop:
     col_vals.append(x)
 mmData = dSet.drop_columns(dataSet2, col_vals)
 plotIt.correlation_matrix(mmData)
 
 # Examine and drop feature with corr value > 0.85
 # Create correlation matrix
 corr_matrix = mmData.corr()
 # Select upper triangle of correlation matrix
 upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
 
 # Find index of feature columns with correlation greater than 0.85
 to_drop = [column for column in upper.columns if any(upper[column] > 0.7)]
 print('Column(s) to drop: %s' % to_drop)
 
 # If there are columns to Drop, change feature dict to indicate Drop
 if len(to_drop) > 0:
     for x in to_drop:
         feature_dict[x] = 'Drop'