コード例 #1
0
ファイル: Pattern_Frame.py プロジェクト: IITDBGroup/cape
class Local_Pattern_Frame:

    def __init__(self,chosen_row=None,pattern_data_df=None,agg_alias=None,data_convert_dict=None,frame_color='light yellow'):

        self.chosen_row = chosen_row
        self.pattern_data_df = pattern_data_df
        self.agg_alias = agg_alias
        self.data_convert_dict = data_convert_dict
        self.frame_color = frame_color

        self.pop_up_frame = Toplevel()
        self.pop_up_frame.geometry("%dx%d%+d%+d" % (1200, 800, 250, 125))
        self.pop_up_frame.wm_title("Pattern Detail")

        self.win_frame = Frame(self.pop_up_frame,bg=self.frame_color)
        self.win_frame.pack(fill=BOTH,expand=True)
        self.win_frame.columnconfigure(0,weight=1)
        self.win_frame.columnconfigure(1,weight=3)
        self.win_frame.rowconfigure(0,weight=1)


    def load_pattern_graph(self):

        graph_frame = Frame(self.win_frame,bg=self.frame_color)
        graph_frame.grid(column=1,row=0,sticky='nesw')
        self.figure = Figure(figsize=(5,5),dpi=130)
        canvas = FigureCanvasTkAgg(self.figure,graph_frame)
        canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=True)
        toolbar = NavigationToolbar2Tk(canvas,graph_frame)
        toolbar.update()
        canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=1)

        if(len(self.pattern_data_df)>=100):
            self.text_plotter = Plotter(figure=self.figure,data_convert_dict=self.data_convert_dict,mode='2D')
            self.text_plotter.add_text("Cannot plot because the size of the data is so large!")

        elif(len(self.chosen_row['variable'])>2):
            self.text_plotter = Plotter(figure=self.figure,data_convert_dict=self.data_convert_dict,mode='2D')
            self.text_plotter.add_text("Cannot plot because the number of dimension of data is higher than 2!")

        elif(self.chosen_row['model']=='const'):
            if(len(self.chosen_row['variable'])==1):
                self.plotter = Plotter(figure=self.figure,data_convert_dict=self.data_convert_dict,mode='2D')
                variable_name= self.chosen_row['variable'][0]
                const=round(self.chosen_row['stats'],2)
                self.plotter.plot_2D_const(const,label="Pattern Model")
                draw_df = self.pattern_data_df[[variable_name,self.agg_alias]]
                low_outlier_df, high_outlier_df = self.get_outlier_frame(self.chosen_row, self.pattern_data_df)

                self.plotter.plot_2D_scatter(draw_df,x=variable_name,y=self.agg_alias,label=self.agg_alias)
                if(low_outlier_df.empty is False):
                    self.plotter.plot_2D_scatter(low_outlier_df,x=variable_name, y=self.agg_alias, 
                        label='Low Outlier(s)',color='#98df8a',marker='*',size=250)
                if(high_outlier_df.empty is False):
                    self.plotter.plot_2D_scatter(high_outlier_df,x=variable_name, y=self.agg_alias, 
                        label='High Outlier(s)',color='#ff9896',marker='*',size=250)

                self.plotter.set_x_label(variable_name)
                self.plotter.set_y_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")
        
            else:

                self.plotter = Plotter(figure=self.figure,data_convert_dict=self.data_convert_dict,mode='3D')
                x_name = self.chosen_row['variable'][0]
                y_name = self.chosen_row['variable'][1]
                const = self.chosen_row['stats']
                draw_const_df = self.pattern_data_df[[x_name,y_name]]
                draw_scatter_df = self.pattern_data_df[[x_name,y_name,self.agg_alias]]
                self.plotter.plot_3D_const(draw_const_df,x=x_name,y=y_name,z_value=const,label="Pattern Model")
                self.plotter.plot_3D_scatter(draw_scatter_df,x=x_name,y=y_name,z=self.agg_alias,label=self.agg_alias)
                self.plotter.set_x_label(x_name)
                self.plotter.set_y_label(y_name)
                self.plotter.set_z_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")

        elif(self.chosen_row['model']=='linear'):
            if(len(self.chosen_row['variable'])==1):

                self.plotter = Plotter(figure=self.figure,data_convert_dict=self.data_convert_dict,mode='2D')
                variable_name = self.chosen_row['variable'][0]
                print(self.chosen_row)
                intercept_value = self.chosen_row['param']['Intercept']
                slope_name = list(self.chosen_row['param'])[1]
                print("slope name " + str(slope_name))
                slope_value = self.chosen_row['param'][slope_name]

                draw_line_df = self.pattern_data_df[[variable_name]]
                draw_scatter_df = self.pattern_data_df[[variable_name,self.agg_alias]]
                low_outlier_df, high_outlier_df = self.get_outlier_frame(self.chosen_row, self.pattern_data_df)

                self.plotter.plot_2D_linear(draw_line_df,slope=slope_value,intercept=intercept_value,label="Pattern Model")
                print("slope value" + str(slope_value))
                print("intercept value" +str(intercept_value))
                self.plotter.plot_2D_scatter(draw_scatter_df,x=variable_name,y=self.agg_alias,label=self.agg_alias)
                if(low_outlier_df.empty is False):
                    self.plotter.plot_2D_scatter(low_outlier_df,x=variable_name, y=self.agg_alias, 
                        label='Low Outlier(s)',color='#98df8a',marker='*',size=250)
                if(high_outlier_df.empty is False):
                    self.plotter.plot_2D_scatter(high_outlier_df,x=variable_name, y=self.agg_alias, 
                        label='High Outlier(s)',color='#ff9896',marker='*',size=250)
                self.plotter.set_x_label(variable_name)
                self.plotter.set_y_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")

        canvas.draw()


    def load_pattern_description(self):

        fixed_attribute = self.chosen_row['fixed']
        fixed_value = self.chosen_row['fixed_value']
        if(len(fixed_attribute)==1):
            fixed_clause=fixed_attribute[0]+' = '+fixed_value[0]
        else:
            pairs = []
            for n in range(len(fixed_attribute)):
                pair = str(fixed_attribute[n])+' = '+str(fixed_value[n])
                pairs.append(pair)
            fixed_clause=',\n'.join(pairs)
        aggregation_function=self.chosen_row['agg']
        modeltype = self.chosen_row['model']
        variable_attribute = self.chosen_row['variable']
        if(len(variable_attribute)==1):
            variable_attribute=variable_attribute[0]
        else:
            variable_attribute=','.join(variable_attribute)
        if(self.chosen_row['model']=='const'):
            pass
            model_str = "\n"
        else:
            Intercept_value = round((self.chosen_row['param']['Intercept']),2)
            slope_name = list(self.chosen_row['param'])[1]
            slope_value = round((self.chosen_row['param'][slope_name]),2)
            model_str = "\nIntercept: "+str(Intercept_value)+',\n '+str(slope_name)+" as Coefficient: "+str(slope_value)
        theta = "The goodness of fit of the model is "+str(round(self.chosen_row['theta'],2))
        local_desc = "For "+fixed_clause+',the '+self.agg_alias +' is '+modeltype+' in '+variable_attribute+'.'
        local_desc = local_desc.replace('const','constant')
        pattern_attr = model_str+theta
        raw_pattern_description = local_desc+pattern_attr
        raw_pattern_description_lists = textwrap.wrap(raw_pattern_description,width=35)
        final_pattern_description = '\n'.join(raw_pattern_description_lists)

        pattern_description = Label(self.win_frame,text=final_pattern_description,font=('Times New Roman bold',18),borderwidth=5,bg=self.frame_color,relief=SOLID,justify=LEFT)
        pattern_description.grid(column=0,row=0,sticky='nsew')

    def get_outlier_frame(self,chosen_row,pattern_data_df):

        copy_pattern_df = pattern_data_df.copy()

        if(chosen_row['model']=='const'):
            Q1 = copy_pattern_df[self.agg_alias].quantile(0.25)
            Q3 = copy_pattern_df[self.agg_alias].quantile(0.75)
            IQR = Q3 - Q1
            low_outlier_df = copy_pattern_df.query("(@Q1 - 1.5 * @IQR) > "+self.agg_alias)
            high_outlier_df = copy_pattern_df.query(self.agg_alias+ " > (@Q3 + 1.5 * @IQR)")

            return low_outlier_df, high_outlier_df
        else:
            x_name = chosen_row['predictor']
            x = copy_pattern_df[x_name].astype(np.float)
            y = copy_pattern_df[self.agg_alias].astype(np.float)
            x = sm.add_constant(x)
            model = sm.OLS(y, x).fit()
            infl = model.get_influence()
            sm_fr = infl.summary_frame()
            copy_pattern_df['predicted_value'] = model.predict(x)
            copy_pattern_df['cooks_d'] = sm_fr['cooks_d']
            low_outlier_df = copy_pattern_df.query(self.agg_alias+" < predicted_value and cooks_d > "+ str(4/copy_pattern_df.shape[0]))
            low_outlier_df = low_outlier_df.drop(['predicted_value','cooks_d'],axis=1)
            high_outlier_df = copy_pattern_df.query(self.agg_alias+" > predicted_value and cooks_d > "+ str(4/copy_pattern_df.shape[0]))
            high_outlier_df = high_outlier_df.drop(['predicted_value','cooks_d'],axis=1)

            return low_outlier_df, high_outlier_df
コード例 #2
0
ファイル: Exp_Frame.py プロジェクト: pdphuong/cape
class Exp_Frame:
    def __init__(self,
                 input_question_df=None,
                 input_explanation_df=None,
                 input_exp_chosen_row=None,
                 input_none_drill_down_df=None,
                 input_drill_down_df=None,
                 input_data_convert_dict=None,
                 frame_color='light yellow'):

        self.win = Toplevel()
        self.win.geometry("%dx%d%+d%+d" % (1580, 900, 250, 125))
        self.win.wm_title("Explanation Detail")
        self.frame_color = frame_color
        self.win_frame = Frame(self.win, bg=self.frame_color)
        self.win_frame.pack(fill=BOTH, expand=True)

        self.question_df = input_question_df
        self.explanation_df = input_explanation_df
        self.exp_chosen_row = input_exp_chosen_row
        self.none_drill_down_df = input_none_drill_down_df

        # print("self.question_df type-----------------")
        # print(self.question_df.dtypes)

        # print("self.explanation_df type-----------------")
        # print(self.explanation_df.dtypes)

        # print("self.exp_chosen_row type-----------------")
        # print(self.exp_chosen_row.dtypes)

        # print("self.none_drill_down_df type-----------------")
        # print(self.none_drill_down_df.dtypes)

        if (input_drill_down_df is not None):
            self.drill_down_df = input_drill_down_df.astype(object)
            # print("self.drill_down_df type-----------------")
            # print(self.drill_down_df.dtypes)
        else:
            self.drill_down_df = None
        self.data_convert_dict = input_data_convert_dict
        self.drill_exist = False

        self.relevent_pattern = self.exp_chosen_row['From_Pattern']
        self.rel_pattern_part = self.relevent_pattern.split(':')[0].split(
            '=')[0].strip('[')
        self.rel_pattern_pred = self.relevent_pattern.split(':')[1].split(
            ' \u2933 ')[0]
        self.rel_pattern_agg = self.relevent_pattern.split(':')[1].split(
            ' \u2933 ')[1]
        self.rel_pattern_part_value = self.relevent_pattern.split(
            ':')[0].split('=')[1].strip(']')
        self.rel_pattern_pred_list = self.rel_pattern_pred.split(',')
        self.rel_pattern_model = self.exp_chosen_row['relevent_model']
        self.rel_param = self.exp_chosen_row['relevent_param']
        self.rel_pattern_part_list = self.rel_pattern_part.split(',')
        self.rel_pattern_pred_list = self.rel_pattern_pred.split(',')
        self.exp_tuple_score = float(self.exp_chosen_row['Score'])
        self.drill_attr = [self.exp_chosen_row['Drill_Down_To'].split(',')]
        self.drill_model = self.exp_chosen_row['refinement_model']
        self.drill_param = self.exp_chosen_row['drill_param']

        # configure the frame structure according the exp type

        if (self.drill_down_df is None):

            self.win_frame.columnconfigure(0, weight=2)
            self.win_frame.columnconfigure(1, weight=3)
            self.win_frame.rowconfigure(0, weight=8)
            self.win_frame.rowconfigure(1, weight=1)

            self.Quit_Button = Button(self.win_frame,
                                      text="Quit",
                                      width=10,
                                      height=4,
                                      command=self.win.destroy)
            self.Quit_Button.grid(column=0, row=1)

            self.rel_graph_frame = Frame(self.win_frame,
                                         borderwidth=5,
                                         relief=RIDGE,
                                         bg=self.frame_color)
            self.rel_graph_frame.grid(column=1,
                                      row=0,
                                      rowspan=2,
                                      sticky='nesw')

            self.exp_frame = Frame(self.win_frame,
                                   borderwidth=5,
                                   relief=RIDGE,
                                   bg=self.frame_color)
            self.exp_frame.grid(column=0, row=0, sticky='nesw')

            self.rel_figure = Figure(figsize=(5, 5), dpi=130)

            self.rel_canvas = FigureCanvasTkAgg(self.rel_figure,
                                                self.rel_graph_frame)
            self.rel_canvas.get_tk_widget().pack(side=TOP,
                                                 fill=BOTH,
                                                 expand=True)
            self.rel_toolbar = NavigationToolbar2Tk(self.rel_canvas,
                                                    self.rel_graph_frame)
            self.rel_toolbar.update()
            self.rel_canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=1)

        else:

            self.drill_exist = True
            self.win_frame.columnconfigure(0, weight=1)
            self.win_frame.columnconfigure(1, weight=1)
            self.win_frame.rowconfigure(0, weight=2)
            self.win_frame.rowconfigure(1, weight=1)

            self.rel_graph_frame = Frame(self.win_frame,
                                         borderwidth=5,
                                         relief=RIDGE,
                                         bg=self.frame_color)
            self.rel_graph_frame.grid(column=0, row=0, sticky='nesw')

            self.drill_graph_frame = Frame(self.win_frame,
                                           borderwidth=5,
                                           relief=RIDGE,
                                           bg=self.frame_color)
            self.drill_graph_frame.grid(column=1, row=0, sticky='nesw')

            self.rel_figure = Figure(figsize=(5, 5), dpi=130)
            self.rel_canvas = FigureCanvasTkAgg(self.rel_figure,
                                                self.rel_graph_frame)
            self.rel_canvas.get_tk_widget().pack(side=TOP,
                                                 fill=BOTH,
                                                 expand=True)
            self.rel_toolbar = NavigationToolbar2Tk(self.rel_canvas,
                                                    self.rel_graph_frame)
            self.rel_toolbar.update()
            self.rel_canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=1)

            self.drill_figure = Figure(figsize=(5, 5), dpi=130)
            self.drill_canvas = FigureCanvasTkAgg(self.drill_figure,
                                                  self.drill_graph_frame)
            self.drill_canvas.get_tk_widget().pack(side=TOP,
                                                   fill=BOTH,
                                                   expand=True)
            self.drill_toolbar = NavigationToolbar2Tk(self.drill_canvas,
                                                      self.drill_graph_frame)
            self.drill_toolbar.update()
            self.drill_canvas.get_tk_widget().pack(side=TOP,
                                                   fill=BOTH,
                                                   expand=1)

            self.exp_frame = Frame(self.win_frame,
                                   borderwidth=5,
                                   relief=RIDGE,
                                   bg=self.frame_color)
            self.exp_frame.grid(column=0, columnspan=2, row=1, sticky='nesw')

    def load_exp_graph(self):

        if (self.drill_exist == False):
            self.load_rel_exp_graph()
        else:
            self.load_rel_question_graph()
            self.load_drill_exp_graph()

    def load_rel_exp_graph(self):

        if (len(self.none_drill_down_df) >= 50):

            self.text_plotter = Plotter(
                figure=self.rel_figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.rel_plotter.add_text(
                "Cannot plot because the size of the data is so large!")

        elif (len(self.rel_pattern_pred_list) > 2):

            self.text_plotter = Plotter(
                figure=self.rel_figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.rel_plotter.add_text(
                "Cannot plot because the number of dimension of data is higher than 2!"
            )

        elif (self.rel_pattern_model == 'const'):

            if (len(self.rel_pattern_pred_list) == 1):
                self.rel_plotter = Plotter(
                    figure=self.rel_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                const = round(float(self.rel_param), 2)

                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_agg

                self.rel_plotter.plot_2D_const(const,
                                               label='Explanation Model')
                # self.rel_plotter.plot_categorical_scatter_2D(x,y)
                none_drill_down_df = copy.deepcopy(
                    self.none_drill_down_df[[x, y]])
                # logger.debug(none_drill_down_df)

                common_cols = self.rel_pattern_part_list + self.rel_pattern_pred_list

                # logger.debug("common_cols for question is ")
                # print(common_cols)

                # logger.debug("self.explanation_df is")
                # logger.debug(self.explanation_df)

                question_df = pd.merge(self.none_drill_down_df,
                                       self.question_df,
                                       on=common_cols)

                explanation_df = pd.merge(self.none_drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                # logger.debug("question_df is ")
                # print(question_df)

                question_df = question_df.rename(index=str,
                                                 columns={
                                                     (y + "_x"): y,
                                                     (x + "_x"): x
                                                 })
                question_df = question_df[[x, y]]

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (x + "_x"): x
                                                       })
                explanation_df = explanation_df[[x, y]]

                self.rel_plotter.plot_2D_scatter(question_df,
                                                 x=x,
                                                 y=y,
                                                 color='r',
                                                 marker='v',
                                                 size=250,
                                                 zorder=10,
                                                 label="User Question")
                self.rel_plotter.plot_2D_scatter(explanation_df,
                                                 x=x,
                                                 y=y,
                                                 color='b',
                                                 marker='^',
                                                 size=250,
                                                 zorder=5,
                                                 label="Explanation")
                self.rel_plotter.plot_2D_scatter(none_drill_down_df,
                                                 x=x,
                                                 y=y,
                                                 zorder=0,
                                                 label=self.rel_pattern_agg)
                self.rel_plotter.set_x_label(x)
                self.rel_plotter.set_y_label(y)
                self.rel_plotter.set_title("Pattern Graph")

            else:

                self.rel_plotter = Plotter(
                    figure=self.rel_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='3D')

                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_pred_list[1]
                z = self.rel_pattern_agg

                const = round(self.rel_param, 2)

                none_drill_down_df = copy.deepcopy(
                    self.none_drill_down_df[[x, y, z]])
                # logger.debug(none_drill_down_df)

                common_cols = self.rel_pattern_part_list + self.rel_pattern_pred_list

                # logger.debug("common_cols for question is ")
                # print(common_cols)

                # logger.debug("self.explanation_df is")
                # logger.debug(self.explanation_df)

                question_df = pd.merge(self.none_drill_down_df,
                                       self.question_df,
                                       on=common_cols)

                explanation_df = pd.merge(self.none_drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                # logger.debug("question_df is ")
                # print(question_df)

                question_df = question_df.rename(index=str,
                                                 columns={
                                                     (y + "_x"): y,
                                                     (x + "_x"): x,
                                                     (z + "_x"): z
                                                 })
                question_df = question_df[[x, y, z]]

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (z + "_x"): z
                                                       })
                explanation_df = explanation_df[[x, y, z]]

                pattern_only_df = pd.concat(
                    [none_drill_down_df, question_df,
                     explanation_df]).drop_duplicates(keep=False)

                self.rel_plotter.plot_3D_const(none_drill_down_df,
                                               x=x,
                                               y=y,
                                               z_value=const,
                                               label="Explanation Model")
                self.rel_plotter.plot_3D_scatter(none_drill_down_df,
                                                 x=x,
                                                 y=y,
                                                 z=z,
                                                 alpha=0)
                self.rel_plotter.plot_3D_scatter(pattern_only_df,
                                                 x=x,
                                                 y=y,
                                                 z=z,
                                                 label=self.rel_pattern_agg)
                self.rel_plotter.plot_3D_scatter(question_df,
                                                 x=x,
                                                 y=y,
                                                 z=z,
                                                 color='r',
                                                 marker='v',
                                                 size=250,
                                                 label="User Question")
                self.rel_plotter.plot_3D_scatter(explanation_df,
                                                 x=x,
                                                 y=y,
                                                 z=z,
                                                 color='b',
                                                 marker='^',
                                                 size=250,
                                                 label="Explanation")

                self.rel_plotter.set_x_label(x)
                self.rel_plotter.set_y_label(y)
                self.rel_plotter.set_z_label(z)
                self.rel_plotter.set_title("Pattern Graph")

        elif (self.rel_pattern_model == 'linear'):
            if (len(self.rel_pattern_pred_list) == 1):

                self.rel_plotter = Plotter(
                    figure=self.rel_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_agg

                intercept_value = self.rel_param['Intercept']
                slope_name = list(self.rel_param)[1]
                slope_value = float(self.rel_param[slope_name])

                draw_line_df = copy.deepcopy(self.none_drill_down_df[[x]])

                none_drill_down_df = copy.deepcopy(
                    self.none_drill_down_df[[x, y]])

                common_cols = self.rel_pattern_part_list + self.rel_pattern_pred_list

                # logger.debug("common_cols for question is ")
                # print(common_cols)

                # logger.debug("self.explanation_df is")
                # logger.debug(self.explanation_df)

                question_df = pd.merge(self.none_drill_down_df,
                                       self.question_df,
                                       on=common_cols)

                explanation_df = pd.merge(self.none_drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                # logger.debug("question_df is ")
                # print(question_df)

                question_df = question_df.rename(index=str,
                                                 columns={
                                                     (y + "_x"): y,
                                                     (x + "_x"): x
                                                 })
                question_df = question_df[[x, y]]

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (x + "_x"): x
                                                       })
                explanation_df = explanation_df[[x, y]]

                self.rel_plotter.plot_2D_linear(draw_line_df,
                                                slope=slope_value,
                                                intercept=intercept_value,
                                                label="Explanation Model")
                self.rel_plotter.plot_2D_scatter(none_drill_down_df,
                                                 x=x,
                                                 y=y,
                                                 label=self.rel_pattern_agg)
                self.rel_plotter.plot_2D_scatter(question_df,
                                                 x=x,
                                                 y=y,
                                                 color='r',
                                                 marker='v',
                                                 size=250,
                                                 zorder=1,
                                                 label="User Question")
                self.rel_plotter.plot_2D_scatter(explanation_df,
                                                 x=x,
                                                 y=y,
                                                 color='b',
                                                 marker='^',
                                                 size=250,
                                                 zorder=2,
                                                 label="Explanation")
                self.rel_plotter.set_x_label(x)
                self.rel_plotter.set_y_label(y)
                self.rel_plotter.set_title("Pattern Graph")

        self.rel_canvas.draw()

    def load_rel_question_graph(self):

        if (len(self.none_drill_down_df) >= 50):

            self.text_plotter = Plotter(
                figure=self.drill_figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.rel_plotter.add_text(
                "Cannot plot because the size of the data is so large!")

        elif (len(self.rel_pattern_pred_list) > 2):

            self.text_plotter = Plotter(
                figure=self.drill_figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.rel_plotter.add_text(
                "Cannot plot because the dimension of the data is higher than 2!"
            )

        elif (self.rel_pattern_model == 'const'):
            if (len(self.rel_pattern_pred_list) == 1):
                self.rel_plotter = Plotter(
                    figure=self.rel_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                const = round(float(self.rel_param), 2)

                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_agg

                self.rel_plotter.plot_2D_const(const)
                # self.rel_plotter.plot_categorical_scatter_2D(x,y)

                common_cols = self.rel_pattern_part_list + self.rel_pattern_pred_list

                # logger.debug("common_cols for question is ")
                # print(common_cols)

                # logger.debug("self.explanation_df is")
                # logger.debug(self.explanation_df)

                question_df = pd.merge(self.none_drill_down_df,
                                       self.question_df,
                                       on=common_cols)

                explanation_df = pd.merge(self.none_drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                # logger.debug("question_df is ")
                # print(question_df)

                question_df = question_df.rename(index=str,
                                                 columns={
                                                     (y + "_x"): y,
                                                     (x + "_x"): x
                                                 })
                question_df = question_df[[x, y]]

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (x + "_x"): x
                                                       })
                explanation_df = explanation_df[[x, y]]

                # logger.debug(question_df)

                self.rel_plotter.plot_2D_scatter(question_df,
                                                 x=x,
                                                 y=y,
                                                 color='r',
                                                 marker='v',
                                                 size=250,
                                                 zorder=10,
                                                 label="User Question")
                self.rel_plotter.plot_2D_scatter(copy.deepcopy(
                    self.none_drill_down_df),
                                                 x=x,
                                                 y=y,
                                                 zorder=0,
                                                 label=self.rel_pattern_agg)
                self.rel_plotter.plot_2D_scatter(explanation_df,
                                                 x=x,
                                                 y=y,
                                                 color='b',
                                                 marker='^',
                                                 size=250,
                                                 zorder=0,
                                                 label="Explanation")
                self.rel_plotter.set_x_label(x)
                self.rel_plotter.set_y_label(y)
                self.rel_plotter.set_title("User Question Graph")

            else:

                pass

                # self.rel_plotter = Plotter(figure=self.rel_figure,data_convert_dict=self.data_convert_dict,mode='3D')

                # x = self.rel_pattern_pred_list[0]
                # y = self.rel_pattern_pred_list[1]
                # z = self.rel_pattern_agg

                # const = round(float(self.rel_param),2)

                # none_drill_down_df = self.none_drill_down_df[[x,y,z]]
                # logger.debug(none_drill_down_df)

                # question_df = self.question_df[[x,y,z]]
                # logger.debug(question_df)

                # pattern_only_df = pd.concat([none_drill_down_df,question_df]).drop_duplicates(keep=False)

                # self.rel_plotter.plot_3D_const(none_drill_down_df,x=x,y=y,z_value=const)
                # self.rel_plotter.plot_3D_scatter(none_drill_down_df,x=x,y=y,z=z,alpha=0)
                # self.rel_plotter.plot_3D_scatter(pattern_only_df,x=x,y=y,z=z)
                # self.rel_plotter.plot_3D_scatter(question_df,x=x,y=y,z=z,color='b',marker='s',size=200)

                # self.rel_plotter.set_x_label(x)
                # self.rel_plotter.set_y_label(y)
                # self.rel_plotter.set_z_label(z)
                # self.rel_plotter.set_title("User Question Graph")

        elif (self.rel_pattern_model == 'linear'):
            if (len(self.rel_pattern_pred_list) == 1):

                self.rel_plotter = Plotter(
                    figure=self.rel_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_agg

                intercept_value = self.rel_param['Intercept']
                slope_name = list(self.rel_param)[1]
                slope_value = float(self.rel_param[slope_name])

                draw_line_df = self.none_drill_down_df[[x]]

                common_cols = self.rel_pattern_part_list + self.rel_pattern_pred_list
                question_df = pd.merge(self.none_drill_down_df,
                                       self.question_df,
                                       on=common_cols)
                logger.debug(self.none_drill_down_df)
                logger.debug(self.explanation_df)
                explanation_df = pd.merge(self.none_drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                # logger.debug("question_df is ")
                # print(question_df)

                question_df = question_df.rename(index=str,
                                                 columns={
                                                     (y + "_x"): y,
                                                     (x + "_x"): x
                                                 })
                question_df = question_df[[x, y]]
                logger.debug(question_df)

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (x + "_x"): x
                                                       })
                explanation_df = explanation_df[[x, y]]
                logger.debug(explanation_df)

                self.rel_plotter.plot_2D_linear(draw_line_df,
                                                slope=slope_value,
                                                intercept=intercept_value,
                                                label="Relevent Model")
                self.rel_plotter.plot_2D_scatter(copy.deepcopy(
                    self.none_drill_down_df),
                                                 x=x,
                                                 y=y,
                                                 label=self.rel_pattern_agg)
                self.rel_plotter.plot_2D_scatter(question_df,
                                                 x=x,
                                                 y=y,
                                                 color='r',
                                                 marker='v',
                                                 size=150,
                                                 zorder=1,
                                                 label="User Question")
                logger.debug(explanation_df)
                self.rel_plotter.plot_2D_scatter(explanation_df,
                                                 x=x,
                                                 y=y,
                                                 color='b',
                                                 marker='^',
                                                 size=150,
                                                 zorder=0,
                                                 label="Explanation")

                self.rel_plotter.set_x_label(x)
                self.rel_plotter.set_y_label(y)
                self.rel_plotter.set_title("User Question Graph")

        self.rel_canvas.draw()

    def load_drill_exp_graph(self):

        if (len(self.none_drill_down_df) >= 50):
            self.drill_plotter.add_text(
                "Cannot plot because the size of the data is so large!")

        elif (len(self.rel_pattern_pred_list) > 2):
            self.drill_plotter.add_text(
                "Cannot plot because the number of dimension of data is higher than 2!"
            )

        if (self.drill_model == 'const'):

            if (len(self.drill_attr) == 1):
                self.drill_plotter = Plotter(
                    figure=self.drill_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                const = round(float(self.drill_param), 2)

                x = self.rel_pattern_pred_list[0]
                y = self.rel_pattern_agg

                self.drill_plotter.plot_2D_const(
                    const, label="Refined Explanation Model")
                # self.drill_plotter.plot_categorical_scatter_2D(x,y)

                common_cols = self.rel_pattern_part_list + self.drill_attr + self.rel_pattern_pred_list

                self.drill_plotter.plot_2D_scatter(copy.deepcopy(
                    self.explanation_df),
                                                   x=x,
                                                   y=y,
                                                   color='b',
                                                   marker='^',
                                                   size=250,
                                                   zorder=10,
                                                   label="Explanation")
                logger.debug("After Drilldown explanation:")
                logger.debug(self.drill_plotter.x_max)
                logger.debug(self.drill_plotter.x_min)
                logger.debug(self.drill_plotter.y_max)
                logger.debug(self.drill_plotter.y_min)
                self.drill_plotter.plot_2D_scatter(copy.deepcopy(
                    self.drill_down_df),
                                                   x=x,
                                                   y=y,
                                                   zorder=0,
                                                   label=self.rel_pattern_agg)
                logger.debug(copy.deepcopy(self.drill_down_df))
                logger.debug("After Drilldown scatters:")
                logger.debug(self.drill_plotter.x_max)
                logger.debug(self.drill_plotter.x_min)
                logger.debug(self.drill_plotter.y_max)
                logger.debug(self.drill_plotter.y_min)
                self.drill_plotter.set_x_label(x)
                self.drill_plotter.set_y_label(y)
                self.drill_plotter.set_title("Refined Pattern Explanation")

            else:
                pass

                # self.drill_plotter = Plotter(figure=self.drill_figure,data_convert_dict=self.data_convert_dict,mode='3D')

                # x = self.rel_pattern_pred_list[0]
                # y = self.rel_pattern_pred_list[1]
                # z = self.rel_pattern_agg

                # const = round(float(self.drill_param),2)

                # drill_down_df = self.drill_down_df[[x,y,z]]
                # logger.debug(drill_down_df)

                # explanation_df = self.explanation_df[[x,y,z]]
                # logger.debug(explanation_df)

                # pattern_only_df = pd.concat([drill_down_df,explanation_df]).drop_duplicates(keep=False)

                # self.drill_plotter.plot_3D_const(drill_down_df,x=x,y=y,z_value=const)
                # self.drill_plotter.plot_3D_scatter(drill_down_df,x=x,y=y,z=z,alpha=0)
                # self.drill_plotter.plot_3D_scatter(pattern_only_df,x=x,y=y,z=z)
                # self.drill_plotter.plot_3D_scatter(explanation_df,x=x,y=y,z=z,color='b',marker='s',size=200,label='Explanation')

                # self.drill_plotter.set_x_label(x)
                # self.drill_plotter.set_y_label(y)
                # self.drill_plotter.set_z_label(z)
                # self.drill_plotter.set_title("Pattern Graph")

                # self.drill_canvas.draw()

        elif (self.drill_model == 'linear'):

            if (len(self.drill_attr) == 1):
                self.drill_plotter = Plotter(
                    figure=self.drill_figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                x = self.drill_attr[0]
                y = self.rel_pattern_agg

                intercept_value = self.drill_param['Intercept']
                slope_name = list(self.drill_param)[1]
                slope_value = float(self.drill_param[slope_name])

                draw_line_df = self.none_drill_down_df[[x]]

                common_cols = self.rel_pattern_part_list + self.drill_attr

                explanation_df = pd.merge(self.drill_down_df,
                                          self.explanation_df,
                                          on=common_cols)

                explanation_df = explanation_df.rename(index=str,
                                                       columns={
                                                           (y + "_x"): y,
                                                           (x + "_x"): x
                                                       })

                explanation_df = explanation_df[[x, y]]

                # logger.debug(explanation_df)
                self.drill_plotter.plot_2D_linear(draw_line_df,
                                                  slope=slope_value,
                                                  intercept=intercept_value)
                self.drill_plotter.plot_2D_scatter(copy.deepcopy(
                    self.drill_down_df),
                                                   x=x,
                                                   y=y)
                self.drill_plotter.plot_2D_scatter(explanation_df,
                                                   x=x,
                                                   y=y,
                                                   zorder=1)
                self.drill_plotter.set_x_label(x)
                self.drill_plotter.set_y_label(y)
                self.drill_plotter.set_title("Pattern Graph")

        self.drill_canvas.draw()

    def load_exp_description(self, user_direction=None):

        exp_tuple_score = float(self.exp_chosen_row['Score'])

        likelihood_words = []

        if (exp_tuple_score <= 0):
            likelihood_words = ['unlikely', 'not similar', 'slighlty']
        elif (exp_tuple_score <= 10):
            likelihood_words = ['plausible', 'somewhat similar', '']
        else:
            likelihood_words = ['highly plausible', 'similar', 'extremly']

        ranking_clause = "  This explanation was ranked " + likelihood_words[
            0] + " because the counterbalance is " + likelihood_words[
                1] + " to the user question and it deviates " + likelihood_words[
                    2] + " from the predicted outcome."

        # logger.debug('ranking_clause:')
        # logger.debug(ranking_clause)

        # logger.debug('self.question_df is:')
        # logger.debug(self.question_df)

        user_question_list = []
        # logger.debug('question_df.items()')
        # logger.debug(self.question_df.items())

        for k, v in self.question_df.items():
            if (k == self.rel_pattern_agg):
                continue
            else:
                user_question_list.append(
                    str(k) + "=" + str(v.to_string(index=False)))
        user_question_clause = ',\n  '.join(user_question_list)
        # logger.debug("user_question_list")
        # logger.debug(user_question_clause)

        predict = ''
        if (len(self.rel_pattern_pred_list) > 1):
            predict = 'predict'
        else:
            predict = 'predicts'

        fixed_pair_list = []
        rel_pattern_part_value_list = self.rel_pattern_part_value.split(",")

        for n in range(len(self.rel_pattern_part_list)):
            eq = (self.rel_pattern_part_list[n] + "=" +
                  rel_pattern_part_value_list[n])
            fixed_pair_list.append(eq)
        if (len(fixed_pair_list) == 1):
            fixed_pair = fixed_pair_list[0]
        else:
            fixed_pair = ",".join(fixed_pair_list)

        variable_pair_list = []
        variable_attr_list = self.rel_pattern_pred_list

        for n in range(len(variable_attr_list)):
            eq = (str(variable_attr_list[n]) + "=" +
                  str(self.question_df[variable_attr_list[n]].to_string(
                      index=False)))
            variable_pair_list.append(eq)
        if (len(variable_pair_list) == 1):
            variable_pair = variable_pair_list[0]
        else:
            variable_pair = ",".join(variable_pair_list)

        counter_dir = ''

        if (user_direction == 'high'):
            counter_dir = 'low'
        else:
            counter_dir = 'high'

        # logger.debug('counter_dir:')
        # logger.debug(counter_dir)

        exp_tuple_dict = self.explanation_df.to_dict('records')[0]

        exp_list = []
        for k, v in exp_tuple_dict.items():
            if (k == self.rel_pattern_agg
                    or k in self.rel_pattern_part.split(',')):
                continue
            else:
                exp_list.append(str(k) + "=" + str(v))
        exp_clause = ','.join(exp_list)

        # logger.debug('exp_clause:')
        # logger.debug(exp_clause)

        # logger.debug('exp_tuple_dict.items()')
        # logger.debug(exp_tuple_dict.items())

        if (self.drill_down_df is None):

            comprehensive_exp = """Explanation for why {} is {}er than expected for: {}. In general, {} {} {}  for most {}. This is also true for {}. However, for {}, {} is {}er than predicted. This may be explained through the {}er than expected outcome for {}.
			""".format(self.rel_pattern_agg, user_direction, user_question_clause,
              str(self.rel_pattern_pred), predict, self.rel_pattern_agg,
              str(self.rel_pattern_part), fixed_pair, variable_pair,
              self.rel_pattern_agg, user_direction, counter_dir, exp_clause)

            raw_exp = ranking_clause + comprehensive_exp
            raw_exp_lists = textwrap.wrap(raw_exp, width=50)
            final_exp_lists = '\n'.join(raw_exp_lists)

        else:

            drill_pair_list = []
            for n in range(len(self.drill_attr)):
                eq = (str(self.drill_attr[n]) + "=" +
                      str(self.explanation_df[self.drill_attr[n]].to_string(
                          index=False)))
                drill_pair_list.append(eq)
            if (len(drill_pair_list) == 1):
                drill_pair = drill_pair_list[0]
            else:
                drill_pair = ",".join(drill_pair_list)

            user_question_clause = ','.join(user_question_list)

            comprehensive_exp = """ Explanation for why {} is {}er than expected for: {}.Even though like many other {}, {} {} {} for {}(Left Graph), the fact that  {} is {} can also be explained by \n{}er than usual number of {} in {}(Right Graph).
			""".format(self.rel_pattern_agg, user_direction, user_question_clause,
              str(self.rel_pattern_part), str(self.rel_pattern_pred), predict,
              self.rel_pattern_agg, fixed_pair, user_question_clause,
              user_direction, counter_dir, self.rel_pattern_agg, exp_clause)

            raw_exp = ranking_clause + comprehensive_exp
            raw_exp_lists = textwrap.wrap(raw_exp, width=90)
            final_exp_lists = '\n'.join(raw_exp_lists)

        final_exp_lists = final_exp_lists.replace('name', 'author')
        final_exp_lists = final_exp_lists.replace('\'', '')

        pattern_description = Label(self.exp_frame,
                                    text=final_exp_lists,
                                    font=('Times New Roman bold', 19),
                                    bg=self.frame_color,
                                    relief=SOLID,
                                    justify=LEFT)
        pattern_description.pack(expand=True)
コード例 #3
0
class Local_Pattern_Frame:
    def __init__(self,
                 chosen_row=None,
                 pattern_data_df=None,
                 agg_alias=None,
                 data_convert_dict=None,
                 frame_color='light yellow'):

        self.chosen_row = chosen_row
        self.pattern_data_df = pattern_data_df
        self.agg_alias = agg_alias
        self.data_convert_dict = data_convert_dict
        self.frame_color = frame_color

        self.pop_up_frame = Toplevel()
        self.pop_up_frame.geometry("%dx%d%+d%+d" % (1200, 800, 250, 125))
        self.pop_up_frame.wm_title("Pattern Detail")

        self.win_frame = Frame(self.pop_up_frame, bg=self.frame_color)
        self.win_frame.pack(fill=BOTH, expand=True)
        self.win_frame.columnconfigure(0, weight=1)
        self.win_frame.columnconfigure(1, weight=3)
        self.win_frame.rowconfigure(0, weight=1)

    def load_pattern_graph(self):

        graph_frame = Frame(self.win_frame, bg=self.frame_color)
        graph_frame.grid(column=1, row=0, sticky='nesw')
        self.figure = Figure(figsize=(5, 5), dpi=130)
        canvas = FigureCanvasTkAgg(self.figure, graph_frame)
        canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=True)
        toolbar = NavigationToolbar2Tk(canvas, graph_frame)
        toolbar.update()
        canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=1)

        if (len(self.pattern_data_df) >= 50):
            self.text_plotter = Plotter(
                figure=self.figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.text_plotter.add_text(
                "Cannot plot because the size of the data is so large!")

        elif (len(self.chosen_row['variable']) > 2):
            self.text_plotter = Plotter(
                figure=self.figure,
                data_convert_dict=self.data_convert_dict,
                mode='2D')
            self.text_plotter.add_text(
                "Cannot plot because the number of dimension of data is higher than 2!"
            )

        elif (self.chosen_row['model'] == 'const'):
            if (len(self.chosen_row['variable']) == 1):
                self.plotter = Plotter(
                    figure=self.figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                variable_name = self.chosen_row['variable'][0]
                const = round(self.chosen_row['stats'], 2)
                self.plotter.plot_2D_const(const, label="Pattern Model")
                draw_df = self.pattern_data_df[[variable_name, self.agg_alias]]
                logger.debug(draw_df)

                self.plotter.plot_2D_scatter(draw_df,
                                             x=variable_name,
                                             y=self.agg_alias,
                                             label=self.agg_alias)
                self.plotter.set_x_label(variable_name)
                self.plotter.set_y_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")

            else:

                self.plotter = Plotter(
                    figure=self.figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='3D')
                x_name = self.chosen_row['variable'][0]
                y_name = self.chosen_row['variable'][1]
                const = self.chosen_row['stats']
                draw_const_df = self.pattern_data_df[[x_name, y_name]]
                draw_scatter_df = self.pattern_data_df[[
                    x_name, y_name, self.agg_alias
                ]]
                self.plotter.plot_3D_const(draw_const_df,
                                           x=x_name,
                                           y=y_name,
                                           z_value=const,
                                           label="Pattern Model")
                self.plotter.plot_3D_scatter(draw_scatter_df,
                                             x=x_name,
                                             y=y_name,
                                             z=self.agg_alias,
                                             label=self.agg_alias)
                self.plotter.set_x_label(x_name)
                self.plotter.set_y_label(y_name)
                self.plotter.set_z_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")

        elif (self.chosen_row['model'] == 'linear'):
            if (len(self.chosen_row['variable']) == 1):

                self.plotter = Plotter(
                    figure=self.figure,
                    data_convert_dict=self.data_convert_dict,
                    mode='2D')
                variable_name = self.chosen_row['variable'][0]
                intercept_value = self.chosen_row['param']['Intercept']
                slope_name = list(self.chosen_row['param'])[1]
                slope_value = float(self.chosen_row['param'][slope_name])

                draw_line_df = self.pattern_data_df[[variable_name]]
                draw_scatter_df = self.pattern_data_df[[
                    variable_name, self.agg_alias
                ]]
                self.plotter.plot_2D_linear(draw_line_df,
                                            slope=slope_value,
                                            intercept=intercept_value,
                                            label="Pattern Model")
                self.plotter.plot_2D_scatter(draw_scatter_df,
                                             x=variable_name,
                                             y=self.agg_alias,
                                             label=self.agg_alias)
                self.plotter.set_x_label(variable_name)
                self.plotter.set_y_label(self.agg_alias)
                self.plotter.set_title("Pattern Graph")

        canvas.draw()

    def load_pattern_description(self):

        fixed_attribute = self.chosen_row['fixed']
        fixed_value = self.chosen_row['fixed_value']
        if (len(fixed_attribute) == 1):
            fixed_clause = fixed_attribute[0] + ' = ' + fixed_value[0]
        else:
            pairs = []
            for n in range(len(fixed_attribute)):
                pair = str(fixed_attribute[n]) + ' = ' + str(fixed_value[n])
                pairs.append(pair)
            fixed_clause = ',\n'.join(pairs)
        aggregation_function = self.chosen_row['agg']
        modeltype = self.chosen_row['model']
        variable_attribute = self.chosen_row['variable']
        if (len(variable_attribute) == 1):
            variable_attribute = variable_attribute[0]
        else:
            variable_attribute = ','.join(variable_attribute)
        if (self.chosen_row['model'] == 'const'):
            pass
            model_str = "\n"
        else:
            Intercept_value = round((self.chosen_row['param']['Intercept']), 2)
            slope_name = list(self.chosen_row['param'])[1]
            slope_value = round((self.chosen_row['param'][slope_name]), 2)
            model_str = "\nIntercept: " + str(Intercept_value) + ',\n ' + str(
                slope_name) + " as Coefficient: " + str(slope_value)
        theta = "The goodness of fit of the model is " + str(
            round(self.chosen_row['theta'], 2))
        local_desc = "For " + fixed_clause + ',the ' + self.agg_alias + ' is ' + modeltype + ' in ' + variable_attribute + '.'
        local_desc = local_desc.replace('const', 'constant')
        pattern_attr = model_str + theta
        raw_pattern_description = local_desc + pattern_attr
        raw_pattern_description_lists = textwrap.wrap(raw_pattern_description,
                                                      width=35)
        final_pattern_description = '\n'.join(raw_pattern_description_lists)

        pattern_description = Label(self.win_frame,
                                    text=final_pattern_description,
                                    font=('Times New Roman bold', 18),
                                    borderwidth=5,
                                    bg=self.frame_color,
                                    relief=SOLID,
                                    justify=LEFT)
        pattern_description.grid(column=0, row=0, sticky='nsew')