Esempi in Python per IdaKMeansError

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: ibmdbpy.exceptions

Classe/tipologia: IdaKMeansError

Esempi su hotexamples.com: 3

IdaKMeansError in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per ibmdbpy.exceptions.IdaKMeansError, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

IdaKMeansError(3)

Metodi utilizzati di frequente

IdaKMeansError (3)

Esempio n. 1

Mostra file

    def predict(self, idadf, column_id=None, outtable=None):
        """
        Apply the K-means clustering model to new data.

        Parameters
        ----------
        idadf : IdaDataFrame
            IdaDataFrame to be used as input.

        column_id : str
            The column of the input table that identifies a unique instance ID.
            Default: the same id column that is specified in the stored procedure to build the model.

        outtable : str
            The name of the output table where the assigned clusters are stored.
            If this parameter is not specified, it is generated automatically.
            If the parameter corresponds to an existing table in the database,
            it is replaced.

        Returns
        -------
        IdaDataFrame
            IdaDataFrame containing the closest cluster for each data point referenced by its ID.
        """
        if not type(idadf).__name__ == 'IdaDataFrame':
            raise TypeError("Argument should be an IdaDataFrame")

        # Check the ID
        if column_id is None:
            column_id = self._column_id
        if column_id not in idadf.columns:
            raise ValueError(
                "No id columns is available in IdaDataFrame:" + column_id +
                ". Either create a new ID column using add_column_id function"
                + " or give the name of a column that can be used as ID")

        if self._idadb is None:
            raise IdaKMeansError("No KMeans model was trained before")

        if outtable is None:
            outtable = idadf._idadb._get_valid_modelname('PREDICT_KMEANS_')
        else:
            if self.outtable:
                outtable = self.outtable
            outtable = ibmdbpy.utils.check_tablename(outtable)
            if idadf._idadb.exists_table(outtable):
                idadf._idadb.drop_table(outtable)

        self.outtable = outtable
        # Create a temporay view
        idadf.internal_state._create_view()
        tmp_view_name = idadf.internal_state.current_state

        if "." in tmp_view_name:
            tmp_view_name = tmp_view_name.split('.')[-1]

        try:
            idadf._idadb._call_stored_procedure("IDAX.PREDICT_KMEANS ",
                                                model=self.modelname,
                                                intable=tmp_view_name,
                                                id=column_id,
                                                outtable=self.outtable)
        except:
            raise
        finally:
            idadf.internal_state._delete_view()
            idadf._idadb.commit()

        self.labels_ = ibmdbpy.IdaDataFrame(idadf._idadb,
                                            outtable,
                                            indexer=column_id)
        return self.labels_

Esempio n. 2

Mostra file

    def _retrieve_KMeans_Model(self, modelname, verbose=False):
        """
        Retrieve information about the model to print the results. The KMEANS 
        IDAX function stores its result in 4 tables:
            * <MODELNAME>_MODEL
            * <MODELNAME>_COLUMNS
            * <MODELNAME>_COLUMN_STATISTICS
            * <MODELNAME>_CLUSTERS

        Parameters
        ----------
        modelname : str
            The name of the model that is retrieved.

        verbose : bol, default: False
            Verbosity mode.
        """
        modelname = ibmdbpy.utils.check_tablename(modelname)

        if self._idadb is None:
            raise IdaKMeansError("No KMeans model was trained before")

        model_main = self._idadb.ida_query('SELECT * FROM "' +
                                           self._idadb.current_schema + '"."' +
                                           modelname + '_MODEL"')
        # Woraround for specific version of ODBC
        model_main.columns = [
            'MODELCLASS', 'COMPARISONTYPE', 'COMPARISONMEASURE', 'NUMCLUSTERS'
        ]
        model_main.columns = [x.upper() for x in model_main.columns]

        col_info = self._idadb.ida_query(
            'SELECT * FROM "' + self._idadb.current_schema + '"."' +
            modelname + '_COLUMNS"', )
        col_info.columns = [
            'COLUMNNAME', 'DATATYPE', 'OPTYPE', 'USAGETYPE', 'COLUMNWEIGHT',
            'AUTOTRANSFORM', 'TRANSFORMEDCOLUMN', 'COMPAREFUNCTION',
            'IMPORTANCE', 'OUTLIERTREATMENT', 'LOWERLIMIT', 'UPPERLIMIT',
            'CLOSURE', 'STATISTICSTYPE'
        ]
        col_info.columns = [x.upper() for x in col_info.columns]

        col_stats = self._idadb.ida_query('SELECT * FROM "' +
                                          self._idadb.current_schema + '"."' +
                                          modelname + '_COLUMN_STATISTICS"')
        col_stats.columns = [
            'CLUSTERID', 'COLUMNNAME', 'CARDINALITY', 'MODE', 'MINIMUM',
            'MAXIMUM', 'MEAN', 'VARIANCE', 'VALIDFREQ', 'MISSINGFREQ',
            'INVALIDFREQ', 'IMPORTANCE'
        ]
        col_stats.columns = [x.upper() for x in col_stats.columns]

        km_out_stat = self._idadb.ida_query('SELECT * FROM "' +
                                            self._idadb.current_schema +
                                            '"."' + modelname + '_CLUSTERS"')
        km_out_stat.columns = [
            'CLUSTERID', 'NAME', 'DESCRIPTION', 'SIZE', 'RELSIZE', 'WITHINSS'
        ]
        km_out_stat.columns = [x.upper() for x in km_out_stat.columns]

        k = model_main.iloc[0][3]
        distance = model_main.iloc[0][2]
        cont_cols = col_info.loc[(col_info['USAGETYPE'] == 'active') &
                                 (col_info['OPTYPE'] == 'continuous'),
                                 ['COLUMNNAME']]
        cat_cols = col_info.loc[(col_info['USAGETYPE'] == 'active') &
                                (col_info['OPTYPE'] == 'categorical'),
                                ['COLUMNNAME']]

        columns = []
        for x in col_stats['COLUMNNAME'].values:
            if x not in columns:
                columns.append(x)

        clusters = km_out_stat['CLUSTERID'].values
        clusters.sort()

        cluster_centers = []
        for cluster in clusters:
            tmp = [cluster]
            for column in columns:
                if column in cont_cols.values:
                    tmp.append(
                        col_stats.loc[(col_stats['CLUSTERID'] == cluster)
                                      & (col_stats['COLUMNNAME'] == column)]
                        ['MEAN'].values[0])
                elif column in cat_cols.values:
                    tmp.append(
                        col_stats.loc[(col_stats['CLUSTERID'] == cluster)
                                      & (col_stats['COLUMNNAME'] == column)]
                        ['MODE'].values[0])
                else:
                    raise TypeError("Unexpected column category")
            cluster_centers.append(tmp)

        centers = pd.DataFrame([tuple(x) for x in cluster_centers])
        centers.columns = ['CLUSTERID'] + columns

        if verbose is True:
            print("MODEL")
            print(model_main)
            print("COLUMNS")
            print(col_info)
            print("COLUMNS_STATISTICS")
            print(col_stats)
            print("CLUSTERS")
            print(km_out_stat)

        result = dict()
        result['withinss'] = km_out_stat['WITHINSS'].values
        result['size'] = km_out_stat['SIZE'].values
        result['relsize'] = km_out_stat['RELSIZE'].values
        result['distance'] = distance

        result['k'] = k
        result['centers'] = centers

        return result

Esempio n. 3

Mostra file

File: kmeans.py Progetto: gmoehler/ibmdbpy

    def _retrieve_KMeans_Model(self, modelname, verbose=False):
        """
        Retrieve information about the model to print the results. The KMEANS 
        IDAX function stores its result in 4 tables:
            * <MODELNAME>_MODEL
            * <MODELNAME>_COLUMNS
            * <MODELNAME>_COLUMN_STATISTICS
            * <MODELNAME>_CLUSTERS

        Parameters
        ----------
        modelname : str
            The name of the model that is retrieved.

        verbose : bol, default: False
            Verbosity mode.
        """
        modelname = ibmdbpy.utils.check_tablename(modelname)

        if self._idadb is None:
            raise IdaKMeansError("No KMeans model was trained before")

        model_main = self._idadb.ida_query('SELECT * FROM "' +
                                           self._idadb.current_schema + '"."' +
                                           modelname + '_MODEL"')
        model_main.columns = [x.upper() for x in model_main.columns]

        col_info = self._idadb.ida_query(
            'SELECT * FROM "' + self._idadb.current_schema + '"."' +
            modelname + '_COLUMNS"', )
        col_info.columns = [x.upper() for x in col_info.columns]

        col_stats = self._idadb.ida_query('SELECT * FROM "' +
                                          self._idadb.current_schema + '"."' +
                                          modelname + '_COLUMN_STATISTICS"')
        col_stats.columns = [x.upper() for x in col_stats.columns]

        km_out_stat = self._idadb.ida_query('SELECT * FROM "' +
                                            self._idadb.current_schema +
                                            '"."' + modelname + '_CLUSTERS"')
        km_out_stat.columns = [x.upper() for x in km_out_stat.columns]

        k = model_main.iloc[0][3]
        distance = model_main.iloc[0][2]
        cont_cols = col_info.loc[(col_info['USAGETYPE'] == 'active') &
                                 (col_info['OPTYPE'] == 'continuous'),
                                 ['COLUMNNAME']]
        cat_cols = col_info.loc[(col_info['USAGETYPE'] == 'active') &
                                (col_info['OPTYPE'] == 'categorical'),
                                ['COLUMNNAME']]

        columns = []
        for x in col_stats['COLUMNNAME'].values:
            if x not in columns:
                columns.append(x)

        clusters = km_out_stat['CLUSTERID'].values
        clusters.sort()

        cluster_centers = []
        for cluster in clusters:
            tmp = [cluster]
            for column in columns:
                if column in cont_cols.values:
                    tmp.append(
                        col_stats.loc[(col_stats['CLUSTERID'] == cluster)
                                      & (col_stats['COLUMNNAME'] == column)]
                        ['MEAN'].values[0])
                elif column in cat_cols.values:
                    tmp.append(
                        col_stats.loc[(col_stats['CLUSTERID'] == cluster)
                                      & (col_stats['COLUMNNAME'] == column)]
                        ['MODE'].values[0])
                else:
                    raise TypeError("Unexpected column category")
            cluster_centers.append(tmp)

        centers = pd.DataFrame([tuple(x) for x in cluster_centers])
        centers.columns = ['CLUSTERID'] + columns

        if verbose is True:
            print("MODEL")
            print(model_main)
            print("COLUMNS")
            print(col_info)
            print("COLUMNS_STATISTICS")
            print(col_stats)
            print("CLUSTERS")
            print(km_out_stat)

        result = dict()
        result['withinss'] = km_out_stat['WITHINSS'].values
        result['size'] = km_out_stat['SIZE'].values
        result['relsize'] = km_out_stat['RELSIZE'].values
        result['distance'] = distance

        result['k'] = k
        result['centers'] = centers

        return result