stats = Paragraph(text='', width=1000, name='Selected Features:') y = df[df.columns[:1]].values.ravel() df1 = df.drop(df.columns[:1], axis=1) selector = SelectKBest(chi2, k=5).fit(df1, y) X_new = selector.transform(df1) mask = selector.get_support() #list of booleans new_features = [] # The list of your K best features for bool, feature in zip(mask, df.columns[1:].tolist()): if bool: new_features.append(feature) #print(new_features) features.value = new_features stats.text = "Top 5 features according to Select K Best (Chi2) : " + str( new_features) x_train_original, x_test_original, y_train_original, y_test_original = train_test_split( X_new, y, test_size=0.25) #For standardizing data #clf = svm.LinearSVC(random_state=0) clf = RandomForestClassifier() clf.fit(x_train_original, y_train_original) predictions = clf.predict(x_test_original) #print("Accuracy =", accuracy_score(y_test_original,predictions)) #print(np.unique(predictions)) tn, fp, fn, tp = confusion_matrix(y_test_original, predictions, labels=[0, 1]).ravel()
def select_values_2(attr, old, new, w_box, c_data): if new != 'None': if c_data[w_box.children[1]. value][new].values.dtype == 'object': # categorical data level_3 = MultiSelect(title='value', value=['None'], options=['None'], width=180) try: level_3.options = np.unique( c_data[w_box.children[1].value] [new].iloc[:, 0].dropna().values).tolist() level_3.value = [level_3.options[0]] except TypeError: level_3.options = np.unique([ str(obj) for obj in c_data[w_box.children[1].value] [new].iloc[:, 0].dropna().values ]).tolist() finally: w_box.children[3] = column(level_3) elif 'datetime' in str(c_data[w_box.children[1].value] [new].values.dtype): # datetime data start = c_data[w_box.children[1].value][new].min().dt.date.item() end = c_data[w_box.children[1].value][new].max().dt.date.item() date_slider = DateRangeSlider( title="", start=start, end=end, value=(start, end), # value_as_date=True, # step=1, width=180) checkbox_group = CheckboxGroup(labels=["invert selection"], active=[], width=180) w_box.children[3] = column(date_slider, checkbox_group) elif 'int' in str(c_data[w_box.children[1].value][new].values.dtype) or \ 'float' in str(c_data[w_box.children[1].value][new].values.dtype): # print("3 ", clinical_data[select_1.value][new].values.dtype) start = c_data[w_box.children[1].value][new].min().item() end = c_data[w_box.children[1].value][new].max().item() slider = RangeSlider(start=start, end=end, step=0.1, value=(start, end), title=new + " Range", width=180) checkbox_group = CheckboxGroup(labels=["invert selection"], active=[], width=180) w_box.children[3] = column(slider, checkbox_group) else: print( "Something went wrong, unexpected datatype by clinical data value selecting" ) # TODO error message? else: w_box.children[3] = PreText(text='please select a property', width=200)