Esempio n. 1
0
    def evaluate(self, data_frame, array_datas, classifier_algo, pre_evaluate_results=None):
        """
        Train a classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if type(pre_evaluate_results) is not list:
            Printer.Print("Pre evaluation results failed! Attach bug report!")
            return result_object
        win = Window.window()

        if data_frame is not None:
            result_object = VizContainer.createResult(win, data_frame, ['cval'])
        elif array_datas is not None:
            result_object = VizContainer.createResult(win, array_datas, ['cval'])
        else:
            Printer.Print("Provide one of data frame or array datas")
            return result_object
        cv_output, aux_output = pre_evaluate_results
        properties, model_data = aux_output.data

        result_object.data = [win, properties, model_data, self.processkFoldCV]
        self.printkValueMessage(cv_output.data[0])
        self.updateWindow(win, cv_output.data[1], cv_output.data[2], model_data[1], properties["title"])
        self.modify_figure.evaluate(result_object)
        return result_object
Esempio n. 2
0
    def preEvaluate(self, data_frame, array_datas, classifier_algo):
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        # Get the data frame
        sns.set(color_codes=True)
        if data_frame is not None:
            df = data_frame.data
            cname = data_frame.name
        elif array_datas is not None:
            command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas)
            if command_status == CommandStatus.Error:
                print("Error in getting dataframe!")
                result_object.data = "Error in getting dataframe!"
                return result_object
        else:
            result_object.data = "Please provide data frame or arrays to analyze"
            return result_object

        # Get the ground truth array
        if StatContainer.ground_truth is None:
            result_object.data = ("Please set a feature vector to ground truth by" +
                                  "typing set ground truth before using this command")
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.ground_truth.data
        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)

        # Get the classifier model
        model = classifier_algo.data[0]

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        properties = self.createDefaultProperties()
        properties['title'] = cname
        cv_output = self.performCV(properties, X, Y, model)
        aux_output = (properties, [X, Y, model])

        return [ResultObject(cv_output, None),
                ResultObject(aux_output, None)]
Esempio n. 3
0
    def evaluate(self, array_datas):
        """
        Create a bar plot between multiple variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
            array_datas)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)

        if StatContainer.ground_truth is None:
            gtVals = np.ones(df.shape[0])
            ground_truth = 'ground_truth'
        else:
            gtVals = StatContainer.filterGroundTruth()
            ground_truth = StatContainer.ground_truth.name
            if len(gtVals) != df.shape[0]:
                print("ground truth does not match with df shape")
                print(len(gtVals), df.shape[0])
                gtVals = np.ones(df.shape[0])
                ground_truth = 'ground_truth'

        # Remove nans:
        df[ground_truth] = gtVals
        df.dropna(inplace=True)
        gtVals = df[ground_truth]
        uniqVals = StatContainer.isCategorical(gtVals)
        binned_ground_truth = False
        if uniqVals is None and np.issubdtype(gtVals.dtype, np.number):
            # Convert to categorical
            df[ground_truth] = pd.cut(gtVals, 10)
            binned_ground_truth = True

        if binned_ground_truth is True or uniqVals is not None:
            gb = df.groupby(ground_truth)
            df_mean = gb.mean()
            df_errors = gb.std()
            if uniqVals is not None and isinstance(uniqVals[0], str):
                truncated_uniqVals, _ = StatContainer.removeCommonNames(
                    df_mean.index)
                df_mean.index = truncated_uniqVals
                df_errors.index = truncated_uniqVals
            # Number of uniq_vals x number of arrs
            df_mean_shape = df_mean.shape
            if (not binned_ground_truth
                    and df_mean_shape[1] >= df_mean_shape[0]):
                df_mean = df_mean.T
                df_errors = df_errors.T
        else:
            Printer.Print("Ground truth could not be mapped to",
                          "categorical array\n")
            Printer.Print("Please clear or select appropriate ground truth")
            return result_object

        properties = self.createDefaultProperties()
        properties['title'] = cname
        if uniqVals is not None and isinstance(uniqVals[0], str):
            max_len = max([len(uniqVal) for uniqVal in uniqVals])
        else:
            max_len = 0
        if (binned_ground_truth or
            (uniqVals is not None and len(uniqVals) > 5 and max_len > 8)):
            properties["horizontal"] = True
        if binned_ground_truth:
            properties["overwrite_labels"] = True
            properties["ylabel"] = StatContainer.ground_truth.name
        win = Window.window()
        result_object = VizContainer.createResult(win, array_datas, ['bar'])
        result_object.data = [
            win, properties, [df_mean, df_errors], self.updateFigure
        ]
        self.updateFigure(result_object.data)
        self.modify_figure.evaluate(result_object)
        return result_object