예제 #1
0
def select_values_2(attr, old, new, w_box, c_data):
    if new != 'None':
        if c_data[w_box.children[1].
                  value][new].values.dtype == 'object':  # categorical data
            level_3 = MultiSelect(title='value',
                                  value=['None'],
                                  options=['None'],
                                  width=180)
            try:
                level_3.options = np.unique(
                    c_data[w_box.children[1].value]
                    [new].iloc[:, 0].dropna().values).tolist()
                level_3.value = [level_3.options[0]]
            except TypeError:
                level_3.options = np.unique([
                    str(obj) for obj in c_data[w_box.children[1].value]
                    [new].iloc[:, 0].dropna().values
                ]).tolist()
            finally:
                w_box.children[3] = column(level_3)

        elif 'datetime' in str(c_data[w_box.children[1].value]
                               [new].values.dtype):  # datetime data
            start = c_data[w_box.children[1].value][new].min().dt.date.item()
            end = c_data[w_box.children[1].value][new].max().dt.date.item()
            date_slider = DateRangeSlider(
                title="",
                start=start,
                end=end,
                value=(start, end),
                # value_as_date=True,
                # step=1,
                width=180)
            checkbox_group = CheckboxGroup(labels=["invert selection"],
                                           active=[],
                                           width=180)
            w_box.children[3] = column(date_slider, checkbox_group)

        elif 'int' in str(c_data[w_box.children[1].value][new].values.dtype) or \
                'float' in str(c_data[w_box.children[1].value][new].values.dtype):
            # print("3   ", clinical_data[select_1.value][new].values.dtype)
            start = c_data[w_box.children[1].value][new].min().item()
            end = c_data[w_box.children[1].value][new].max().item()
            slider = RangeSlider(start=start,
                                 end=end,
                                 step=0.1,
                                 value=(start, end),
                                 title=new + " Range",
                                 width=180)
            checkbox_group = CheckboxGroup(labels=["invert selection"],
                                           active=[],
                                           width=180)
            w_box.children[3] = column(slider, checkbox_group)

        else:
            print(
                "Something went wrong, unexpected datatype by clinical data value selecting"
            )  # TODO error message?

    else:
        w_box.children[3] = PreText(text='please select a property', width=200)
예제 #2
0
def tab_analysis(csv):

    csv_original = csv

    g = csv_original.columns.to_series().groupby(csv_original.dtypes).groups
    g_list = list(g.keys())

    t = Figure()

    def convert(val, target):
        val_type = str(type(val))

        if ('float' in val_type):
            return float(target)
        elif ('int' in val_type):
            return int(target)
        elif ('str' in val_type):
            return str(target)

    box_figure = figure(tools="save",
                        background_fill_color="#EFE8E2",
                        title="Box",
                        plot_width=500,
                        plot_height=500,
                        toolbar_location="below",
                        x_range=[])
    box_figure.add_tools(WheelZoomTool())
    box_figure.add_tools(PanTool())

    corr_figure = figure(plot_width=500,
                         plot_height=500,
                         title="Correlation",
                         toolbar_location=None,
                         tools="",
                         x_axis_location="above",
                         x_range=[],
                         y_range=[])

    def make_box_plot(df, param_list):
        df_box = pd.DataFrame(columns=['group', 'value'])

        for col in param_list:
            temp = pd.DataFrame(columns=['group', 'value'])
            temp['value'] = df[col].values
            temp['group'] = col

            df_box = pd.concat([df_box, temp])

        cats = param_list

        groups = df_box.groupby('group')
        q1 = groups.quantile(q=0.25)
        q2 = groups.quantile(q=0.5)
        q3 = groups.quantile(q=0.75)
        iqr = q3 - q1
        upper = q3 + 1.5 * iqr
        lower = q1 - 1.5 * iqr

        # find the outliers for each category
        def outliers(group):
            cat = group.name
            return group[(group.value > upper.loc[cat]['value']) |
                         (group.value < lower.loc[cat]['value'])]['value']

        out = groups.apply(outliers).dropna()

        # prepare outlier data for plotting, we need coordinates for every outlier.
        if not out.empty:
            outx = []
            outy = []
            for cat in cats:
                # only add outliers if they exist
                if not out.loc[cat].empty:
                    for value in out[cat]:
                        outx.append(cat)
                        outy.append(value)

        box_figure.x_range.factors = cats

        # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
        qmin = groups.quantile(q=0.00)
        qmax = groups.quantile(q=1.00)
        upper.value = [
            min([x, y])
            for (x, y) in zip(list(qmax.loc[:, 'value']), upper.value)
        ]
        lower.value = [
            max([x, y])
            for (x, y) in zip(list(qmin.loc[:, 'value']), lower.value)
        ]

        # stems
        box_figure.segment(cats,
                           upper.value,
                           cats,
                           q3.value,
                           line_color="black")
        box_figure.segment(cats,
                           lower.value,
                           cats,
                           q1.value,
                           line_color="black")

        # boxes
        box_figure.vbar(cats,
                        0.7,
                        q2.value,
                        q3.value,
                        fill_color="#E08E79",
                        line_color="black")
        box_figure.vbar(cats,
                        0.7,
                        q1.value,
                        q2.value,
                        fill_color="#3B8686",
                        line_color="black")

        # whiskers (almost-0 height rects simpler than segments)
        box_figure.rect(cats, lower.value, 0.2, 0.01, line_color="black")
        box_figure.rect(cats, upper.value, 0.2, 0.01, line_color="black")

        # outliers
        if not out.empty:
            box_figure.circle(outx,
                              outy,
                              size=6,
                              color="#F38630",
                              fill_alpha=0.6)

        box_figure.xgrid.grid_line_color = None
        box_figure.ygrid.grid_line_color = "white"
        box_figure.grid.grid_line_width = 2
        box_figure.xaxis.major_label_text_font_size = "12pt"

    def make_correlation_plot(df, param_list):
        df_corr = df[param_list].corr().fillna(0)
        df_corr = df_corr.stack().rename("value").reset_index()

        print(df_corr)

        colors = RdBu[11]

        # Had a specific mapper to map color with value
        mapper = LinearColorMapper(palette=colors, low=-1, high=1)

        corr_figure.x_range.factors = list(df_corr.level_0.drop_duplicates())
        corr_figure.y_range.factors = list(df_corr.level_1.drop_duplicates())

        hover = HoverTool(tooltips=[
            ("Corr", "@value"),
        ])

        # Create rectangle for heatmap
        corr_figure.rect(x="level_0",
                         y="level_1",
                         width=1,
                         height=1,
                         source=ColumnDataSource(df_corr),
                         line_color=None,
                         fill_color=transform('value', mapper))
        corr_figure.add_tools(hover)

        # Add legend
        color_bar = ColorBar(color_mapper=mapper, location=(0, 0))

        corr_figure.add_layout(color_bar, 'left')

    box_cor_x = MultiSelect(title="Predictor")
    button_box_corr = Button(label="Analysis response")

    def box_corr_handler():

        param_list = box_cor_x.value

        make_box_plot(csv_original, param_list)
        make_correlation_plot(csv_original, param_list)

    button_box_corr.on_click(box_corr_handler)

    param_key = MultiSelect(title="Separator(Maximum 2)")
    param_key.options = list(csv_original.columns)

    param_x = MultiSelect(title="Predictor")
    param_x.options = list(csv_original.columns)

    param_y = Select(title="Response")
    param_y.options = list(csv_original.columns)

    button_set = Button(label="Set parameter")

    key1 = MultiSelect(title="Key 1")
    key2 = MultiSelect(title="Key 2")

    target_x = Select(title="Sensor")
    #show_option = RadioGroup(labels=["Raw", "Moving average"], active=0)
    show_option = CheckboxGroup(labels=["Raw", "Moving average"],
                                active=[0, 1])
    average_select = Slider(start=2,
                            end=30,
                            value=5,
                            step=1,
                            title='Average window')

    # 3rd row
    target_reduction = MultiSelect(title="Target for dimension reduction")
    reduction_method = Select(title="Dimension reduction",
                              options=["PCA", "Autoencoder"])
    button_reduction = Button(label="Show result")
    figure_reduction = figure(tools="save, lasso_select",
                              title="Dimension reduction result",
                              plot_width=500,
                              plot_height=500,
                              toolbar_location="below")

    src = ColumnDataSource(data=dict(x=[], y=[], time=[]))
    # color_mapper = LinearColorMapper(palette='Viridis256', low=min(csv_original['group_index'].values), high=max(csv_original['group_index'].values))
    color_mapper = LinearColorMapper(palette='Viridis256', low=0, high=1000)
    figure_reduction.circle('x',
                            'y',
                            source=src,
                            size=5,
                            color={
                                'field': 'time',
                                'transform': color_mapper
                            })
    TOOLTIPS = [("(x,y)", "($x, $y)"), ("Time", "@time")]
    figure_reduction.add_tools(HoverTool(tooltips=TOOLTIPS))

    src_reduction = ColumnDataSource(
        data=dict(center_x=[0], center_y=[0], radius=[0]))
    figure_reduction.circle("center_x",
                            "center_y",
                            radius="radius",
                            source=src_reduction,
                            alpha=0.3)

    def set_handler():

        if (len(param_key.value) >= 1):
            key1.options = list(
                map(lambda x: str(x),
                    csv_original[param_key.value[0]].unique()))

        if (len(param_key.value) >= 2):
            key2.options = list(
                map(lambda x: str(x),
                    csv_original[param_key.value[1]].unique()))

        if (len(param_key.value) != 0):
            csv_original['group_index'] = csv_original.groupby(
                param_key.value).cumcount(
                ) + 1  # Index per group -> consider it as time flow
        else:
            csv_original['group_index'] = [
                i + 1 for i in range(csv_original.shape[0])
            ]

        x_list = []
        for col in param_x.value:
            if (csv_original[col].std() <= 0.0):
                continue

            x_list.append(col)

        target_x.options = x_list
        target_reduction.options = x_list
        box_cor_x.options = x_list

    button_set.on_click(set_handler)

    figure_multi_line = figure(tools="save",
                               title="Sensor value per key",
                               plot_width=1000,
                               plot_height=500,
                               toolbar_location="below")
    src1 = ColumnDataSource()
    button_sensor = Button(label="Show values")

    def sensor_hander():

        xs = []
        ys = []
        label_key = []
        colors = []
        line_width = []
        rolling_mean = int(average_select.value)

        if (target_x.value == ""):
            target_x.value = target_x.options[0]

        if (len(param_key.value) == 0):

            if (0 in show_option.active):

                y = csv_original[target_x.value].values
                x = np.arange(y.shape[0])
                xs.append(x)
                ys.append(y)
                label_key.append(str(target_x.value))
                colors.append(0)
                line_width.append(1)

            if (1 in show_option.active):

                y = csv_original[target_x.value].rolling(
                    window=rolling_mean).mean().fillna(method='ffill').values
                x = np.arange(y.shape[0])
                xs.append(x)
                ys.append(y)
                label_key.append(str(target_x.value))
                colors.append(2)
                line_width.append(3)

        elif (len(param_key.value) == 1):
            cond1 = csv_original[param_key.value[0]].isin(key1.value)
            csv_slice = csv_original[cond1]

            for group in key1.value:
                if (0 in show_option.active):

                    print(csv_slice[param_key.value[0]].head())
                    print(csv_slice[param_key.value[0]].iloc[0])
                    print()

                    group_convert = convert(
                        csv_slice[param_key.value[0]].iloc[0], group)

                    y = csv_slice[csv_slice[param_key.value[0]] ==
                                  group_convert][target_x.value].values
                    x = np.arange(y.shape[0])
                    xs.append(x)
                    ys.append(y)
                    label_key.append(str(group_convert))
                    colors.append(group_convert)
                    line_width.append(1)

                if (1 in show_option.active):

                    group_convert = convert(
                        csv_slice[param_key.value[0]].iloc[0], group)

                    y = csv_slice[csv_slice[param_key.value[0]] ==
                                  group_convert][target_x.value].rolling(
                                      window=rolling_mean).mean().fillna(
                                          method='ffill').values
                    x = np.arange(y.shape[0])
                    xs.append(x)
                    ys.append(y)
                    label_key.append(str(group_convert))
                    colors.append(group_convert)
                    line_width.append(3)

        elif (len(param_key.value) == 2):
            cond1 = csv_original[param_key.value[0]].isin(key1.value)
            cond2 = csv_original[param_key.value[1]].isin(key2.value)
            csv_slice = csv_original[cond1 & cond2]

            # need type check

            for group1 in key1.value:
                for group2 in key2.value:
                    if (0 in show_option.active):

                        group_convert1 = convert(
                            csv_slice[param_key.value[0]].iloc[0], group1)
                        group_convert2 = convert(
                            csv_slice[param_key.value[1]].iloc[0], group2)

                        target_cond1 = csv_slice[
                            param_key.value[0]] == group_convert1
                        target_cond2 = csv_slice[
                            param_key.value[1]] == group_convert2

                        y = csv_slice[target_cond1
                                      & target_cond2][target_x.value].values
                        x = np.arange(y.shape[0])
                        xs.append(x)
                        ys.append(y)
                        label_key.append(
                            str(group_convert1) + " / " + str(group_convert2))
                        colors.append([group_convert1, group_convert2])
                        line_width.append(1)

                    if (1 in show_option.active):

                        group_convert1 = convert(
                            csv_slice[param_key.value[0]].iloc[0], group1)
                        group_convert2 = convert(
                            csv_slice[param_key.value[1]].iloc[0], group2)

                        target_cond1 = csv_slice[
                            param_key.value[0]] == group_convert1
                        target_cond2 = csv_slice[
                            param_key.value[1]] == group_convert2

                        y = csv_slice[target_cond1
                                      & target_cond2][target_x.value].rolling(
                                          window=rolling_mean).mean().fillna(
                                              method='ffill').values
                        x = np.arange(y.shape[0])
                        xs.append(x)
                        ys.append(y)
                        label_key.append(
                            str(group_convert1) + " / " + str(group_convert2))
                        colors.append([group_convert1, group_convert2])
                        line_width.append(3)

        color_all = [i for i in range(len(colors))]
        src1.data = ColumnDataSource(data=dict(xs=xs,
                                               ys=ys,
                                               label=label_key,
                                               color_all=color_all,
                                               line_width=line_width)).data

        figure_multi_line.multi_line('xs',
                                     'ys',
                                     legend='label',
                                     source=src1,
                                     color=linear_cmap('color_all',
                                                       "Viridis256", 0,
                                                       len(colors) - 1),
                                     line_width="line_width")
        ###
        TOOLTIPS = [
            ("Keys", "@label"),
        ]
        figure_multi_line.add_tools(HoverTool(tooltips=TOOLTIPS))
        ###

    button_sensor.on_click(sensor_hander)

    def reduction_handler():

        print(reduction_method.options)
        print(reduction_method.value)

        x = csv_original[target_reduction.value].values

        if (reduction_method.value == "" or reduction_method.value == "PCA"):

            pca = decomposition.PCA(n_components=2)
            scaler = preprocessing.MinMaxScaler()

            result = pca.fit_transform(x)

            r_x = scaler.fit_transform(X=np.expand_dims(result[:, 0], -1))
            r_y = scaler.fit_transform(X=np.expand_dims(result[:, 1], -1))

            src.data = ColumnDataSource(data=dict(
                x=r_x, y=r_y, time=csv_original['group_index'].values)).data

            csv_original['reduction_1'] = r_x
            csv_original['reduction_2'] = r_y

            result_x_mean = np.mean(r_x)
            result_y_mean = np.mean(r_y)

            msg = "X center: " + str(
                result_x_mean) + "\n" + "_Y center: " + str(result_y_mean)

        else:
            print("Autoencoder")
            scaler = preprocessing.MinMaxScaler()

            result = ae.auto_encoder(x, 2, epoch=50)
            r_x = scaler.fit_transform(X=np.expand_dims(result[:, 0], -1))
            r_y = scaler.fit_transform(X=np.expand_dims(result[:, 1], -1))

            src.data = ColumnDataSource(data=dict(
                x=r_x, y=r_y, time=csv_original['group_index'].values)).data

            csv_original['reduction_1'] = r_x
            csv_original['reduction_2'] = r_y

            result_x_mean = np.mean(r_x)
            result_y_mean = np.mean(r_y)

            msg = "X center: " + str(
                result_x_mean) + "\n" + " Y_center: " + str(result_y_mean)

        color_mapper.low = min(csv_original['group_index'].values)
        color_mapper.high = max(csv_original['group_index'].values)
        src_reduction.data = ColumnDataSource(
            data=dict(center_x=[0], center_y=[0], radius=[0])).data
        ##################################################################################################################################### 컬럼 바꾸기
        csv_original['Classification_cutPoint'] = np.NaN
        csv_original['Classification_manualSelect'] = np.NaN

    button_reduction.on_click(reduction_handler)

    set_y_select = Dropdown(label="Select Y",
                            menu=[
                                ("Linear", "item_1"), ("Weillibul", "item_2"),
                                ("Piecewise", "item_3"), None,
                                ("Dimension-reduction based select", "item_4")
                            ])
    new_y_setter_piecewise = TextInput(value="150",
                                       title="Piecewise cut point")
    new_y_setter_x = TextInput(value="", title="Regression center X")
    new_y_setter_y = TextInput(value="", title="Regression center Y")
    new_y_setter_r = TextInput(value="",
                               title="Radius(out-of-bound value equal 0)")
    button_new_y = Button(label="Set new regression label")

    def set_y_handler():
        def y_linear(length):
            y = [i for i in range(length)]
            y.reverse()

            return y

        def y_weillibul(length, k=1.5, lmd=0.00002):
            dist_length = 1000
            y_ = [
                math.pow(math.e, -lmd * math.pow(i, k))
                for i in range(dist_length + 1)
            ]
            y_end = y_[dist_length]

            return [(y_[int(
                (dist_length / length) * i)] - y_end) / (1.0 - y_end) * length
                    for i in range(length)]

        def y_piecewise(length, const=130):

            y = [i for i in range(length)]
            y.reverse()
            y = [x if x < const else const for x in y]

            return y

        if (set_y_select.value == "item_1"):

            ys = []

            if (len(param_key.value) == 1):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    ys += y_linear(csv_original[csv_original[
                        param_key.value[0]] == k].shape[0])

            elif (len(param_key.value) == 2):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    csv_temp = csv_original[csv_original[param_key.value[0]] ==
                                            k]
                    key2 = csv_temp[param_key.value[1]].unique()

                    for k2 in key2:
                        ys += y_linear(csv_temp[csv_temp[param_key.value[1]] ==
                                                k2].shape[0])

            csv_original['Regression_linear'] = ys

            msg = "Set Y value: Linear"
            message_y.text = msg

        if (set_y_select.value == "item_2"):

            ys = []

            if (len(param_key.value) == 1):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    ys += y_weillibul(csv_original[csv_original[
                        param_key.value[0]] == k].shape[0])

            elif (len(param_key.value) == 2):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    csv_temp = csv_original[csv_original[param_key.value[0]] ==
                                            k]
                    key2 = csv_temp[param_key.value[1]].unique()

                    for k2 in key2:
                        ys += y_weillibul(csv_temp[csv_temp[param_key.value[1]]
                                                   == k2].shape[0])

            csv_original['Regression_weillibul'] = ys

            msg = "Set Y value: Weillibul"
            message_y.text = msg

        if (set_y_select.value == "item_3"):

            ys = []

            if (len(param_key.value) == 1):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    ys += y_piecewise(
                        csv_original[csv_original[param_key.value[0]] ==
                                     k].shape[0],
                        int(new_y_setter_piecewise.value))

            elif (len(param_key.value) == 2):
                key1 = csv_original[param_key.value[0]].unique()
                for k in key1:
                    csv_temp = csv_original[csv_original[param_key.value[0]] ==
                                            k]
                    key2 = csv_temp[param_key.value[1]].unique()

                    for k2 in key2:
                        ys += y_piecewise(
                            csv_temp[csv_temp[param_key.value[1]] ==
                                     k2].shape[0],
                            int(new_y_setter_piecewise.value))

            csv_original['Regression_piecewise'] = ys

            msg = "Set Y value: Piecewise"
            message_y.text = msg

        if (set_y_select.value == "item_4"):
            center_x = float(new_y_setter_x.value)
            center_y = float(new_y_setter_y.value)
            radius = float(new_y_setter_r.value)
            """ add circle to the figure """
            # figure_reduction.circle([center_x], [center_y], radius=radius, alpha=0.3)

            src_reduction.data = ColumnDataSource(data=dict(
                center_x=[center_x], center_y=[center_y], radius=[radius
                                                                  ])).data

            result_val = csv_original[['reduction_1', 'reduction_2']].values
            result_y = list(
                map(
                    lambda xy: 1.0 - np.sqrt((center_x - xy[0])**2 +
                                             (center_y - xy[1])**2) / radius,
                    result_val))
            result_y = [x if x >= 0.0 else 0 for x in result_y]

            csv_original['Regression_manualSelect'] = result_y

            #csv_original.to_csv('temp.csv')

            msg = "Set Y value: From reduction"
            message_y.text = msg

    button_new_y.on_click(set_y_handler)

    new_class_cut = TextInput(value="", title="Enter class cut point")
    new_class_setter = TextInput(value="", title="Enter class name")

    set_y_clss_select = Dropdown(label="Select Y",
                                 menu=[("Manual cut", "item_1"), None,
                                       ("Dimension-reduction based select",
                                        "item_2")])

    button_add_label = Button(label="Add classification label")
    button_new_class = Button(label="Set classification label")

    labels = []

    def label_adder_handler():
        indices = src.selected['1d']['indices']
        print(indices[:10])

        if (len(indices) == 0):
            label_notifier.text = "Non selected"
            return

        csv_original['Classification_manualSelect'].iloc[
            indices] = new_class_setter.value
        labels.append(new_class_setter.value)
        print("add label")
        label_notifier.text = str(labels)

    button_add_label.on_click(label_adder_handler)

    def label_all_handler():

        if (set_y_clss_select.value == "item_1"):
            class_cut = new_class_cut.value
            class_cut = list(map(int, class_cut.split(',')))

            # csv_original['Class'] = pd.np.digitize(csv_original['group_index'], bins=class_cut).astype(str)
            csv_original['Classification_cutPoint'] = pd.np.digitize(
                csv_original.groupby(param_key.value)['group_index'].transform(
                    lambda x: x[::-1]),
                bins=class_cut).astype(str)

            label_notifier.text = "Labeling complete \n\nLabel: " + str(
                class_cut)
            del labels[:]

        elif (set_y_clss_select.value == "item_2"):
            if (csv_original['Classification_manualSelect'].isnull().any().any(
            )):
                print("There is NaN values")

                csv_original[
                    'Classification_manualSelect'] = csv_original.fillna(
                        method='ffill')['Classification_manualSelect'].values

            label_notifier.text = "Labeling complete \n\nLabel: " + str(labels)
            del labels[:]

        print(csv_original['Classification_cutPoint'])
        print(csv_original['Classification_manualSelect'])

    button_new_class.on_click(label_all_handler)

    head_regression = Div(text=""" <b>Set Regression Label</b> """)
    head_classification = Div(text=""" <b>Set Class Label</b> """)
    label_notifier = Paragraph(text=""" - """)

    button_export = Button(label="Export CSV")

    def handler_export():
        csv_original.to_csv('./Export/exported.csv', index=False)

    button_export.on_click(handler_export)

    message_y = Paragraph(text=""" - """, width=200, height=200)

    layout = Column(
        Row(param_key, param_x, param_y, button_set),
        Row(Column(box_cor_x, button_box_corr), box_figure, corr_figure),
        Row(Column(key1, key2), figure_multi_line,
            Column(target_x, show_option, average_select, button_sensor)),
        Row(
            Column(reduction_method, target_reduction, button_reduction),
            figure_reduction,
            Column(head_regression, set_y_select, new_y_setter_piecewise,
                   new_y_setter_x, new_y_setter_y, new_y_setter_r,
                   button_new_y, message_y),
            Column(head_classification, set_y_clss_select, new_class_cut,
                   new_class_setter, button_add_label, button_new_class,
                   label_notifier), button_export))

    tab = Panel(child=layout, title='Analysis')

    return tab