예제 #1
0
    def evaluate(self, data_frame, array_datas):
        """
        Run Isomap on a dataset of multiple arrays

        """

        # Get the data frame
        if data_frame is not None:
            df = data_frame.data
            df = DataGuru.convertStrCols_toNumeric(df)
            cname = data_frame.name
        elif array_datas is not None:
            command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True)
            if command_status == CommandStatus.Error:
                return ResultObject(None, None, None, CommandStatus.Error)
        else:
            Printer.Print("Please provide data frame or arrays to analyze")
            return ResultObject(None, None, None, CommandStatus.Error)
        Y = None
        if StatContainer.ground_truth is not None:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()
            df, Y = DataGuru.removenan(df, Y)
        # Remove nans:
        else:
            df.dropna(inplace=True)

        # Get the Isomap model

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier

        win = Window.window()

        properties = self.createDefaultProperties()
        properties['title'] = cname

        # return ResultObject(None, None, None, CommandStatus.Success)
        if data_frame is not None:
            result_object = VizContainer.createResult(win, data_frame,
                                                      ['ismp'])
        else:
            result_object = VizContainer.createResult(win, array_datas,
                                                      ['ismp'])

        result_object.data = [win, properties, [X, Y], self.updateFigure]
        self.updateFigure(result_object.data)
        self.modify_figure.evaluate(result_object)
        return result_object
예제 #2
0
파일: NLDR_pca.py 프로젝트: alfarvis/ALFA
    def evaluate(self, data_frame, array_datas):
        """
        Run pca on a dataset of multiple arrays

        """

        # Get the data frame
        if data_frame is not None:
            df = data_frame.data
            df = DataGuru.convertStrCols_toNumeric(df)
            cname = data_frame.name
        elif array_datas is not None:
            command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True)
            if command_status == CommandStatus.Error:
                return ResultObject(None, None, None, CommandStatus.Error)
        else:
            Printer.Print("Please provide data frame or arrays to analyze")
            return ResultObject(None, None, None, CommandStatus.Error)
        Y = None
        if StatContainer.ground_truth is not None:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()
            # Remove nans:
            df, Y = DataGuru.removenan(df, Y)
        else:
            df.dropna(inplace=True)

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier
        pca = PCA(n_components=2)
        pca_res = pca.fit_transform(X)
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        if Y is None:
            sc = ax.scatter(pca_res[:, 0],
                            pca_res[:, 1],
                            cmap="jet",
                            edgecolor="None",
                            alpha=0.35)
        else:
            sc = ax.scatter(pca_res[:, 0],
                            pca_res[:, 1],
                            c=Y,
                            cmap="jet",
                            edgecolor="None",
                            alpha=0.35)
            cbar = plt.colorbar(sc)
            cbar.ax.get_yaxis().labelpad = 15
            cbar.ax.set_ylabel(StatContainer.ground_truth.name, rotation=270)

        ax.set_title(cname)
        win.show()
        # return ResultObject(None, None, None, CommandStatus.Success)

        if data_frame is not None:
            return VizContainer.createResult(win, data_frame, ['pca'])
        else:
            return VizContainer.createResult(win, array_datas, ['pca'])
예제 #3
0
    def evaluate(self, data_frame, array_datas, target):
        """
        Run clustering on a dataset of multiple arrays

        """

        # Get the data frame
        if data_frame is not None:
            df = data_frame.data

            cname = data_frame.name
        elif array_datas is not None:
            command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True)

            if command_status == CommandStatus.Error:
                return ResultObject(None, None, None, CommandStatus.Error)

        else:
            Printer.Print("Please provide data frame or arrays to analyze")
            return ResultObject(None, None, None, CommandStatus.Error)
        Y = None
        if StatContainer.ground_truth is not None:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()

            # Remove nans:
            df, Y = DataGuru.removenan(df, Y)
        else:
            df.dropna(inplace=True)

        # Get the tsne model

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier
        numbers = findNumbers(target.data, 1)
        if numbers != [] and numbers[0].data > 0:
            num_clusters = int(numbers[0].data)
        else:
            num_clusters = 2  # If not specified use 2 clusters

        kY = self.performOperation(X, num_clusters)
        result_objects = []
        if StatContainer.ground_truth is not None:
            df_res = pd.DataFrame()
            df_res['ground_truth'] = Y
            df_res['clustering_result'] = kY
            df_res.pivot_table(index=df_res.columns[0],
                               columns=df_res.columns[1],
                               aggfunc=np.size,
                               fill_value=0)
            win = Window.window()
            f = win.gcf()
            ax = f.add_subplot(111)

            df_res = DataGuru.convertStrCols_toNumeric(df_res)

            sns.heatmap(df_res, ax=ax)
            win.show()
            if data_frame is not None:
                result_object = VizContainer.createResult(
                    win, data_frame, ['clstr.fig'])
            else:
                result_object = VizContainer.createResult(
                    win, array_datas, ['clstr.fig'])
            result_objects.append(result_object)

        result_object = ResultObject(kY, [], DataType.array,
                                     CommandStatus.Success)
        result_object.createName(cname,
                                 command_name="clstr",
                                 set_keyword_list=True)

        result_objects.append(result_object)
        return result_objects