Ejemplo n.º 1
0
stats = Paragraph(text='', width=1000, name='Selected Features:')

y = df[df.columns[:1]].values.ravel()
df1 = df.drop(df.columns[:1], axis=1)
selector = SelectKBest(chi2, k=5).fit(df1, y)
X_new = selector.transform(df1)
mask = selector.get_support()  #list of booleans
new_features = []  # The list of your K best features

for bool, feature in zip(mask, df.columns[1:].tolist()):
    if bool:
        new_features.append(feature)

#print(new_features)

features.value = new_features
stats.text = "Top 5 features according to Select K Best (Chi2) : " + str(
    new_features)

x_train_original, x_test_original, y_train_original, y_test_original = train_test_split(
    X_new, y, test_size=0.25)
#For standardizing data

#clf = svm.LinearSVC(random_state=0)
clf = RandomForestClassifier()
clf.fit(x_train_original, y_train_original)
predictions = clf.predict(x_test_original)
#print("Accuracy =", accuracy_score(y_test_original,predictions))
#print(np.unique(predictions))
tn, fp, fn, tp = confusion_matrix(y_test_original, predictions,
                                  labels=[0, 1]).ravel()
Ejemplo n.º 2
0
def select_values_2(attr, old, new, w_box, c_data):
    if new != 'None':
        if c_data[w_box.children[1].
                  value][new].values.dtype == 'object':  # categorical data
            level_3 = MultiSelect(title='value',
                                  value=['None'],
                                  options=['None'],
                                  width=180)
            try:
                level_3.options = np.unique(
                    c_data[w_box.children[1].value]
                    [new].iloc[:, 0].dropna().values).tolist()
                level_3.value = [level_3.options[0]]
            except TypeError:
                level_3.options = np.unique([
                    str(obj) for obj in c_data[w_box.children[1].value]
                    [new].iloc[:, 0].dropna().values
                ]).tolist()
            finally:
                w_box.children[3] = column(level_3)

        elif 'datetime' in str(c_data[w_box.children[1].value]
                               [new].values.dtype):  # datetime data
            start = c_data[w_box.children[1].value][new].min().dt.date.item()
            end = c_data[w_box.children[1].value][new].max().dt.date.item()
            date_slider = DateRangeSlider(
                title="",
                start=start,
                end=end,
                value=(start, end),
                # value_as_date=True,
                # step=1,
                width=180)
            checkbox_group = CheckboxGroup(labels=["invert selection"],
                                           active=[],
                                           width=180)
            w_box.children[3] = column(date_slider, checkbox_group)

        elif 'int' in str(c_data[w_box.children[1].value][new].values.dtype) or \
                'float' in str(c_data[w_box.children[1].value][new].values.dtype):
            # print("3   ", clinical_data[select_1.value][new].values.dtype)
            start = c_data[w_box.children[1].value][new].min().item()
            end = c_data[w_box.children[1].value][new].max().item()
            slider = RangeSlider(start=start,
                                 end=end,
                                 step=0.1,
                                 value=(start, end),
                                 title=new + " Range",
                                 width=180)
            checkbox_group = CheckboxGroup(labels=["invert selection"],
                                           active=[],
                                           width=180)
            w_box.children[3] = column(slider, checkbox_group)

        else:
            print(
                "Something went wrong, unexpected datatype by clinical data value selecting"
            )  # TODO error message?

    else:
        w_box.children[3] = PreText(text='please select a property', width=200)