Ejemplo n.º 1
0
def runFeatures_KNN(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("run feature method")
    print("Number of features Selected KNN : ", numberOfFeature)
    KNN = KNeighborsClassifier(n_neighbors=8, p=3)
    sfs1 = sfs(KNN,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features KNN are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_KNN.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = KNN_evaluation_procedure(KNN, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_KNN(X_valid, Y_valid)
    return str1, train_acc, val_acc
Ejemplo n.º 2
0
def do_feature_selection(model, trainAndValidation, trainAndValidation_y, minFeatures, maxFeatures, mainFeatures,
                         fixed_features, focal_class):
    validation_indices = trainAndValidation[trainAndValidation.set_annotation == 'validation'].index
    validSet = PredefinedHoldoutSplit(validation_indices)
    trainAndValidation_y = np.where(trainAndValidation_y == focal_class, 1, 0)
    X, y = trainAndValidation[mainFeatures], trainAndValidation_y
    results = {}
    for totFeatures in range(minFeatures, maxFeatures):
        # Build step forward feature selection
        curSFS = sfs(model, k_features=totFeatures, forward=True,
                     verbose=0,
                     # If 0, no output,
                     # if 1 number of features in current set,
                     # if 2 detailed logging including timestamp and cv scores at step.
                     scoring=auc_scorer,  # 'roc_auc',
                     cv=validSet, n_jobs=1,
                     fixed_features=fixed_features)
        curSFS = curSFS.fit(X, y)
        feat_cols = list(curSFS.k_feature_idx_)
        sel = []
        for i in feat_cols:
            sel.append(mainFeatures[i])
        key = ",".join(sorted(sel))
        if key in results: print("error")
        results[key] = curSFS.k_score_
    return results
Ejemplo n.º 3
0
def forward_feature_selection(x_data, y_data, n_select):
    print("Applying forward feature selection to numerical data")
    print(
        f"cat variables before forward feature selection {x_data.select_dtypes(include='object').shape}"
    )
    print(
        f"numeric variables before forward feature selection {x_data.select_dtypes(include='number').shape}"
    )
    num_cols = x_data.select_dtypes(include='number').columns
    temp = x_data[num_cols]
    sfsf = sfs(RandomForestRegressor(n_jobs=5),
               k_features=n_select,
               forward=True,
               floating=False,
               verbose=2,
               cv=3,
               scoring='r2')
    sfsf.fit(temp, y_data)
    idx = sfsf.k_feature_idx_
    idx = list(idx)
    cols_to_keep = num_cols[idx]

    cols_to_drop = [x for x in num_cols if x not in cols_to_keep]
    x_data.drop(labels=cols_to_drop, axis=1, inplace=True)

    print(
        f"cat variables after forward feature selection {x_data.select_dtypes(include='object').columns}"
    )
    print(
        f"numeric variables after forward feature selection {x_data.select_dtypes(include='number').columns}"
    )
    return x_data
Ejemplo n.º 4
0
def runFeatures_SVM(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected SVM : ", numberOfFeature)
    SVC1 = SVC(kernel='linear', probability=True, random_state=0)
    sfs1 = sfs(SVC1,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features SVM are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_SVM.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = SVM_evaluation_procedure(SVC1, X_train, Y_train, X_test,
                                         Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_SVM(X_valid, Y_valid)
    return str1, train_acc, val_acc
Ejemplo n.º 5
0
def runFeatures_LR(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected LR: ", numberOfFeature)
    LR = LogisticRegression(penalty='l1', tol=0.1, random_state=12)
    sfs1 = sfs(LR,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features LR are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_LR.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = LR_evaluation_procedure(LR, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_LR(X_valid, Y_valid)
    return str1, train_acc, val_acc
Ejemplo n.º 6
0
def runFeatures_RF(i, numberOfFeature, X_train, Y_train, X_test, Y_test):
    print("Number of features Selected RF : ", numberOfFeature)
    RF = RandomForestClassifier(n_estimators=100,
                                random_state=1,
                                max_features='log2')
    sfs1 = sfs(RF,
               k_features=numberOfFeature,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=0,
               n_jobs=-1)
    sfs1.fit(X_train, Y_train)
    cols = sfs1.k_feature_idx_
    print('The indices of best features RF are: ', cols, '\n')
    str1 = ','.join(str(e) for e in cols)

    X_train = sfs1.transform(X_train)
    X_test = sfs1.transform(X_test)

    filename = 'saved_models/SFS_RF.pkl'
    pickle.dump(sfs1, open(filename, 'wb'))
    train_acc = RF_evaluation_procedure(RF, X_train, Y_train, X_test, Y_test)
    X_valid, Y_valid = SFS_validate.read_features(i)
    sfs1 = pickle.load(open(filename, 'rb'))
    X_valid = sfs1.transform(X_valid)
    val_acc = SFS_validate.validation_procedure_RF(X_valid, Y_valid)
    print("val acc runFeatures_RF", val_acc)
    return str1, train_acc, val_acc
Ejemplo n.º 7
0
def do_sfs(x_tr, y_tr):
    sfs_kern = sfs(svm.SVC(kernel='rbf'),
                   k_features=n_features,
                   forward=True,
                   floating=True,
                   verbose=2,
                   scoring='accuracy',
                   cv=5)
    sfs_kern.fit(x_tr, y_tr)
    return sfs_kern
Ejemplo n.º 8
0
 def feature_selection(self,X,y):
     lda=LinearDiscriminantAnalysis(solver='lsqr')
     initial_list=[]
     included=list(initial_list)
     X=self.pretreat(X)
     sfs1 = sfs(lda,k_features=self.max_steps,forward=self.forw,floating=self.flot,
            verbose=0,scoring=self.score,cv=self.cvl)
     sfs1 = sfs1.fit(X, y)
     a=list(sfs1.k_feature_names_)
     return a
Ejemplo n.º 9
0
def selectFeatures(algorithm, X_train, y_train, numberOfFeatures, isForward):
    selector = sfs(algorithm,
                   k_features=(1, numberOfFeatures),
                   forward=isForward,
                   floating=False,
                   verbose=0,
                   scoring='accuracy',
                   cv=None,
                   n_jobs=-1)
    selector.fit(X_train.values, y_train)
    return list(selector.k_feature_idx_)
Ejemplo n.º 10
0
def getBestFeaturesForQDA(trainingData):
    x = trainingData.iloc[:, 0:11]
    y = trainingData.iloc[:, 11]
    bestFeatures = sfs(
        da.QuadraticDiscriminantAnalysis(),
        k_features="best",
        forward=False,
        floating=False,
        verbose=False,
        scoring='r2',
    ).fit(x, y)
    return bestFeatures.k_feature_names_, bestFeatures.k_feature_idx_
Ejemplo n.º 11
0
def getBestFeaturesForHigherOrderTerms(trainingData, num_features):
    x = trainingData.loc[:, trainingData.columns != 'label']
    y = trainingData.loc[:, 'label']
    bestFeatures = sfs(
        da.QuadraticDiscriminantAnalysis(),
        k_features=num_features,
        forward=True,
        floating=False,
        verbose=2,
        scoring='r2',
    ).fit(x, y)
    return bestFeatures.k_feature_names_
Ejemplo n.º 12
0
def getBestFeaturesForHigherOrderTerms(clf,
                                       trainingData,
                                       num_features,
                                       scoringString='r2'):
    x = trainingData.loc[:, trainingData.columns != 'label']
    y = trainingData.loc[:, 'label']
    bestFeatures = sfs(clf,
                       k_features=num_features,
                       forward=True,
                       floating=False,
                       verbose=2,
                       scoring=scoringString,
                       n_jobs=5).fit(x, y)
    return bestFeatures.k_feature_names_
def forward_step_feature_selection(x_train_1, y_train_1):
    # Build RF classifier to use in feature selection
    clf = RandomForestRegressor(n_estimators=100, n_jobs=-1)

    # Build step forward feature selection
    sfs1 = sfs(clf,
               k_features=10,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)
    # Perform SFFS
    sfs1 = sfs1.fit(x_train_1, y_train_1)
def fwrd_selection(scaled_X, Y):
    # Build RF classifier to use in feature selection
    clf = LogisticRegression()

    sfs1 = sfs(clf,
               k_features='best',
               forward=True,
               floating=False,
               verbose=0,
               scoring='accuracy',
               cv=5)
    sfs1 = sfs1.fit(scaled_X, Y)
    feat_cols = list(sfs1.k_feature_idx_)
    fs_vars = [scaled_X.columns[i] for i in feat_cols]
    return fs_vars
Ejemplo n.º 15
0
 def feature_selection(self, X, y):
     mlr = LinearRegression()
     initial_list = []
     included = list(initial_list)
     X = self.pretreat(X)
     sfs1 = sfs(mlr,
                k_features=self.max_steps,
                forward=self.forw,
                floating=self.flot,
                verbose=0,
                scoring=self.score,
                cv=self.cvl)
     sfs1 = sfs1.fit(X, y)
     a = list(sfs1.k_feature_names_)
     return a
Ejemplo n.º 16
0
def wrapper_forward_selection(X, y, top_feat, model):
    model_forward = sfs(model,
                        k_features=top_feat,
                        forward=True,
                        floating=False,
                        verbose=0,
                        cv=5,
                        n_jobs=-1,
                        scoring='accuracy')
    model_forward.fit(X, y)
    res = list(
        map(lambda e: e['feature_names'], model_forward.subsets_.values())
    )  # [[len0],[len1],[len2],...,[lenN-1]]
    res.sort(key=len)
    return res
def stepFeatureSelect(X, y, regressor, num_features=10, direction=False):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    X_train = pd.DataFrame(X_train, columns=list(X))
    X_test = pd.DataFrame(X_test, columns=list(X))
    stepF = sfs(regressor,
                k_features=num_features,
                forward=direction,
                floating=False,
                verbose=2,
                scoring='r2',
                cv=3,
                n_jobs=-1).fit(X_train, y_train)
    return FeatureSelector(stepF, X)
Ejemplo n.º 18
0
def select_features(model, X, y, n=10):
    """Input the number of features you want to have"""
    candidate = []
    # Build step forward feature selection
    sfs1 = sfs(model,
               k_features=n,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)
    # Perform SFFS
    sfs1 = sfs1.fit(X, y)
    # The index list of the important features
    feat_cols = list(sfs1.k_feature_idx_)
    for idx in feat_cols:
        candidate.append(X.columns[idx])
    return candidate
Ejemplo n.º 19
0
def WrapperAlgo(x_train, y_train):
    clsf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    # Build step forward feature selection
    sfs1 = sfs(
        clsf,
        k_features=18,
        forward=True,
        # The floating algorithms have an additional exclusion or inclusion step to remove features once they
        # were included (or excluded), so that a larger number of feature subset combinations can be sampled
        floating=False,
        verbose=2,
        scoring='accuracy',
        cv=5)

    # Perform SFFS
    sfs1 = sfs1.fit(x_train, y_train)
    # Which features?
    feat_cols = list(sfs1.k_feature_idx_)
    return feat_cols
Ejemplo n.º 20
0
def selectFeatures30(X, Y):
    """ Select 30 features using step forward selection"""
    # Build RF classifier to use in feature selection
    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)

    # Build step forward feature selection
    sfs1 = sfs(clf,
               k_features=30,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)

    # Perform SFS
    sfs1 = sfs1.fit(X, Y)
    feat_cols = list(sfs1.k_feature_idx_)
    print(feat_cols)
    return sfs1
Ejemplo n.º 21
0
def run_sffs(X_train, X_test, y_train, y_test, clf, normalize, k_features, cv):
    if normalize == 'yes':
        X_train, X_test = normalize_features(X_train, X_test)
    print('Starting SFFS Dimensionality Reduction ..')
    start = time.time()
    sfs1 = sfs(clf,
               k_features=k_features,
               forward=True,
               floating=True,
               verbose=2,
               scoring='accuracy',
               cv=cv,
               n_jobs=-1)
    sfs1 = sfs1.fit(X_train, y_train)

    feat_cols = list(sfs1.k_feature_idx_)
    end = time.time()
    print('\nSFFS done in', end - start, 'seconds\n')
    print('Reduced dimension : ', len(feat_cols))
    return X_train[:, feat_cols], X_test[:, feat_cols]
Ejemplo n.º 22
0
 def forward_selection(cls, df, features_count=1):
     if df.name == 'train':
         qwk_scorer = make_scorer(cls.quadratic_weighted_kappa,
                                  greater_is_better=True)
         model = RandomForestClassifier(n_estimators=100, n_jobs=-1)
         X = df.drop('AdoptionSpeed', axis=1)
         y = df['AdoptionSpeed']
         X_train, X_test,\
             y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                random_state=42)
         y_train = y_train.ravel()
         y_test = y_test.ravel()
         sfs1 = sfs(model,
                    k_features=3,
                    forward=True,
                    floating=False,
                    verbose=2,
                    scoring=qwk_scorer,
                    cv=5)
         sfs1 = sfs1.fit(X_train, y_train)
         best_cols = list(sfs1.k_feature_idx_)
     return best_cols
Ejemplo n.º 23
0
# %%
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(df.values[:, :-1],
                                                    df.values[:, -1],
                                                    test_size=0.30,
                                                    random_state=42,
                                                    shuffle=False)

y_train = y_train.astype('int')
y_test = y_test.astype('int')

# %%
sfs1 = sfs(clf,
           k_features='best',
           scoring='accuracy',
           verbose=2,
           forward=True,
           cv=tscv(n_splits=5))

#Perform SFFS
sfs1 = sfs1.fit(X_train, y_train)

# %%
print("Best accuracy from sfs:", sfs1.k_score_)
print("Indices selected by sfs:", sfs1.k_feature_idx_)
print("List of selected indices:",
      df.columns[[x for x in (list(sfs1.k_feature_idx_))]])

# %%
#sfs lr acc
clf.fit(X_train[:, list(sfs1.k_feature_idx_)], y_train)

reg__model.summary()
reg__model

predicted_values=(reg__model.predict(x_test))


from sklearn.metrics import mean_squared_error

np.sqrt(mean_squared_error(y_test, predicted_values))

np.exp(predicted_values)


import pandas as pd
dataset2.sort_values('income', ascending = False)

from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.linear_model import LinearRegression

model = sfs(LinearRegression(),2,forward=False,n_jobs=-1,floating=True,verbose=3,scoring='r2').fit(np.array(x_train),y_train)


model.k_feature_idx_
from mlxtend.feature_selection import ExhaustiveFeatureSelector as efs
model1 = efs(LinearRegression(),2,forward=False,n_jobs=-1,floating=True,verbose=3,scoring='r2').fit(np.array(x_train),y_train)


from mlxtend.feature_selection import ExhaustiveFeatureSelector as efs
efs(LinearRegression(),1, 3,n_jobs=-1,scoring='r2', print_progress= True, clone_estimator=True).fit((x_train),y_train)
"""**Building model with the best features and checking the R2 score for the same**"""

mask = selector.support_
print(f"Best features according to RFE {X_m.columns[mask].values}")

X_m1 = X_m.iloc[:,mask]
# We could have used train test split or cross validation strategies
# for scoring the model but in order to compare with the stats model 
# we will use the whole data
model1 = LinearRegression().fit(X_m1,y_m)
print(f"R2 Score: {model1.score(X_m1,y_m)}")

"""### Forward Selection"""

model = LinearRegression(fit_intercept=False)
sfs1 = sfs(model,k_features=20,forward=True,scoring='r2',cv=5)
sfs1.fit(X_m,y_m)
fig = plot_sfs(sfs1.get_metric_dict())
plt.title('Forward Selection')
plt.grid()
plt.show()

print(sfs1.k_features, sfs1.k_feature_names_,sep="\n")

index = list(sfs1.k_feature_idx_)
X_m1 = X_m.iloc[:,index]
model1 = LinearRegression().fit(X_m1,y_m)
print(f"R2 Score: {model1.score(X_m1,y_m)}")

"""## Regularization
1. Lasso
Ejemplo n.º 26
0
# select a Series from the DataFrame
y = MFB_Data['2']
DT = MFB_Data.drop(['2'], axis=1)
X = DT[:]
# print the first 5 values
y.shape

# In[21]:

# Build step forward feature selection
from mlxtend.feature_selection import SequentialFeatureSelector as sfs

sfs1 = sfs(clf,
           k_features=56,
           forward=True,
           floating=False,
           verbose=2,
           scoring='accuracy',
           cv=5)

# In[99]:

# Which features?
feat_cols = list(sfs1.k_feature_idx_)
print(feat_cols)

# In[126]:

# check the type and shape of y
print(type(y))
print(y.shape)
Ejemplo n.º 27
0
y = my_data[0:20000, 0].astype(str)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

clf = RandomForestClassifier(n_estimators=10)

#clf = SVC(kernel='linear')

#try multiple scoring parameters, like 'accuracy', 'neg_mean_squared_error', None
sfs1 = sfs(clf,
           k_features=10,
           forward=True,
           floating=True,
           verbose=2,
           scoring='accuracy',
           cv=3,
           n_jobs=-1)
sfs1 = sfs1.fit(X_train, y_train)

feat_cols = list(sfs1.k_feature_idx_)
print(feat_cols)

# Build full model with selected features
clf.fit(X_train[:, feat_cols], y_train)
train_accuracy = clf.score(X_train[:, feat_cols], y_train)
test_accuracy = clf.score(X_test[:, feat_cols], y_test)

y_train_pred = clf.predict(X_train[:, feat_cols])
y_test_pred = clf.predict(X_test[:, feat_cols])
Ejemplo n.º 28
0
    return X_train, X_test


X_train, X_test = standardize(X_train, X_test)

X_train
"""<b>Inference :</b> The above table is fetched upon implementing the Standard Scaling on Train dataset to bring all variables to the Standardized format.

## Feature selection
"""

linreg = LinearRegression()

linreg_forward = sfs(estimator=linreg,
                     k_features=100,
                     forward=True,
                     verbose=2,
                     scoring='r2')
sfs_forward = linreg_forward.fit(X_train, y_train)
"""<b>Inference :</b> building a forward feature selection

It is evident that from for features 31 to 51, the score is constant at 0.86 and decreases form the level 52 to 0.84,

This indicates with 51 sigbificant features, the Model Efficiency can be increased.

Hence we are running the Model with 51 features using the standard Linear Regression technique as done below.
"""

linreg = LinearRegression()

linreg_forward = sfs(estimator=linreg,
Ejemplo n.º 29
0
def train():
    bankdata = pd.read_csv('trainingbin_.csv')
    X = bankdata.drop('class_label', axis=1)
    y = bankdata['class_label']
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.feature_selection import SequentialFeatureSelector as sfs
    from sklearn.svm import SVC
    scaler = QuantileTransformer(output_distribution='uniform')
    X_train = scaler.fit_transform(X_train)
    #y_train= scaler.fit_transform(y_train)
    X_test = scaler.fit_transform(X_test)
    #y_test= scaler.fit_transform(y_test)
    #from sklearn.ensemble import RandomForestClassifier
    clf = svm.SVC(kernel='linear', C=8192)
    #clf = SVC(kernel='linear')
    #clf = RandomForestClassifier(n_estimators=100)
    sfs1 = sfs(clf,
               k_features=10,
               forward=True,
               floating=False,
               verbose=2,
               scoring='accuracy')
    sfs1 = sfs1.fit_transform(X_train, y_train)
    X_train_rfe = sfs1.fit_transform(X_train)
    X_test_rfe = sfs1.fit_transform(X_test)
    #clf = RandomForestClassifier(n_estimators=1000, random_state=42, max_depth=11)
    clf.fit(X_train_rfe, y_train)
    y_train_pred = clf.predict(X_train_rfe)
    from sklearn.metrics import accuracy_score as acc
    print('Training accuracy on all features: %.3f' %
          acc(y_train, y_train_pred))
    y_test_pred = clf.predict(X_test_rfe)
    print('Testing accuracy on all features: %.3f' % acc(y_test, y_test_pred))
    #svclassifier = SVC(kernel='rbf', gamma='auto', degree=3)

    #y_pred = test.predict(X_test)
    from sklearn.metrics import classification_report, confusion_matrix
    from sklearn import metrics
    from sklearn.metrics import accuracy_score as acc
    print(confusion_matrix(y_test, y_test_pred))
    cnf_matrix = confusion_matrix(y_test, y_test_pred)
    #print(classification_report(y_test,y_pred))
    FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
    FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
    TP = np.diag(cnf_matrix)
    TN = cnf_matrix.sum() - (FP + FN + TP)

    FP = FP.astype(float)
    FN = FN.astype(float)
    TP = TP.astype(float)
    TN = TN.astype(float)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)

    print("FNR:", sum(FNR) / 55)
    print("FPR:", sum(FPR) / 55)
    print("ACC:", 100 * (sum(ACC) / 55))
Ejemplo n.º 30
0
import pandas as pd
import numpy as np
from sklearn import linear_model
import numpy as np
import plotly.graph_objs as go
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.preprocessing import scale

df = pd.read_csv('clean_data.csv', index_col=[0])

X = df.drop('lrfs', axis=1)
y = df['lrfs']

model = linear_model.LinearRegression()

sfs1 = sfs(model,
           k_features=(1, 16),
           forward=True,
           floating=True,
           verbose=2,
           scoring='r2')

sfs1 = sfs1.fit(X, y)

print("")

for i in sfs1.k_feature_idx_:
    print(X.columns[i])