Beispiel #1
0
    def predict(self,
                idadf,
                outtable=None,
                transaction_id=None,
                item_id=None,
                type="rules",
                limit=1,
                sort=None):
        """
        Apply the rules and patterns of an association rules model to other
        transactions. You can apply all rules or only specific rules according
        to specified criteria.

        Parameters
        ----------
        idadf : IdaDataFrame
            IdaDataFrame to be used as input.

        outtable : str, optional
            The name of the output table in which the mapping between the input 
            sequences and the associated rules or patterns is written. If the 
            parameter corresponds to an existing table in the database, it is 
            replaced.

        transaction_id : str, optional
            The column of the input table that identifies the transaction ID. 
            By default, this is the same tid column that is specified in the 
            stored procedure to build the model.


        item_id : str, optional
            The column of the input table that identifies an item of the 
            transaction. By default, this is the same item column that is 
            specified in the stored procedure to build the model.


        type : str, optional, default : "rules"
            The type of information that is written in the output table. The 
            following values are possible: ‘rules’ and ‘patterns’.

        limit : int, optional, >=1, default: 1
            The maximum number of rules or patterns that is written in the 
            output table for each input sequence.

        sort : str or list, optional
            A list of keywords that indicates the order in which the rules or 
            patterns are written in the output table. The order of the list is 
            descending. The items are separated by semicolons. The following 
            values are possible: ‘support’, ‘confidence’, ‘lift’, and ‘length’. 
            The ‘confidence’ value can only be specified if the type parameter 
            is ‘rules’. If the type parameter is ‘rules’, the default is: 
            support;confidence;length.  If the type parameter is ‘patterns’, 
            the default is: support;lift;length. 

        Notes
        -----
        When "type" is set to "rules", it looks like nothing is returned.
        """
        if not isinstance(idadf, ibmdbpy.IdaDataFrame):
            raise TypeError("Argument should be an IdaDataFrame")

        if sort is not None:
            sort = ';'.join(sort)

        if transaction_id is None:
            transaction_id = self.transaction_id
        if item_id is None:
            item_id = self.item_id

        # Check the ID
        if transaction_id not in idadf.columns:
            raise ValueError("Transaction id column" + transaction_id +
                             " is not available in IdaDataFrame.")

        if self._idadb is None:
            raise IdaAssociationRulesError(
                "No Association rules model was trained before.")

        # The version where we don't replace the outtable if it exists but raise an exception
        #if outtable is not None:
        #    if idadf._idadb.exists_table(outtable):
        #        raise ValueError("Table "+ outtable +" already exists.")
        #else:
        #    outtable = idadf._idadb._get_valid_modelname('PREDICT_ASSOCRULES_')

        if self.outtable is None:
            self.outtable = idadf._idadb._get_valid_tablename('NAIVEBAYES_')
        else:
            self.outtable = ibmdbpy.utils.check_tablename(self.outtable)
            if idadf._idadb.exists_table(self.outtable):
                idadf._idadb.drop_table(self.outtable)

        self.outtable = outtable
        self.type = type
        self.limit = limit
        self.sort = sort

        # Create a temporay view
        idadf.internal_state._create_view()
        tmp_view_name = idadf.internal_state.current_state

        if "." in tmp_view_name:
            tmp_view_name = tmp_view_name.split('.')[-1]

        try:
            idadf._idadb._call_stored_procedure("IDAX.PREDICT_ASSOCRULES ",
                                                model=self.modelname,
                                                intable=tmp_view_name,
                                                outtable=outtable,
                                                tid=transaction_id,
                                                item=item_id,
                                                type=type,
                                                limit=limit,
                                                sort=sort)
        except:
            raise
        finally:
            idadf.internal_state._delete_view()
            idadf._cursor.commit()

        self.labels_ = ibmdbpy.IdaDataFrame(idadf._idadb, outtable)
        return self.labels_
Beispiel #2
0
    def _retrieve_AssociationRules_Model(self, modelname, verbose=False):
        """
        Retrieve information about the model to print the results. The 
        Association Rules IDAX function stores its result in 4 tables:
            * <MODELNAME>_ASSOCPATTERNS
            * <MODELNAME>_ASSOCPATTERNS_STATISTICS
            * <MODELNAME>_ASSOCRULES
            * <MODELNAME>_ITEMS

        Parameters
        ----------
        modelname : str
            The name of the model that is retrieved.
        verbose : bol, default: False
            Verbosity mode.

        Notes
        -----
        Needs better formatting instead of printing the tables
        """
        modelname = ibmdbpy.utils.check_tablename(modelname)

        if self._idadb is None:
            raise IdaAssociationRulesError(
                "No Association rules model was trained before.")

        assocpatterns = self._idadb.ida_query('SELECT * FROM "' +
                                              self._idadb.current_schema +
                                              '"."' + modelname +
                                              '_ASSOCPATTERNS"')
        assocpatterns.columns = [x.upper() for x in assocpatterns.columns]

        assocpatterns_stats = self._idadb.ida_query(
            'SELECT * FROM "' + self._idadb.current_schema + '"."' +
            modelname + '_ASSOCPATTERNS_STATISTICS"')
        assocpatterns_stats.columns = [
            x.upper() for x in assocpatterns_stats.columns
        ]

        assocrules = self._idadb.ida_query('SELECT * FROM "' +
                                           self._idadb.current_schema + '"."' +
                                           modelname + '_ASSOCRULES"')
        assocrules.columns = [x.upper() for x in assocrules.columns]

        items = self._idadb.ida_query('SELECT * FROM "' +
                                      self._idadb.current_schema + '"."' +
                                      modelname + '_ITEMS"')
        items.columns = [x.upper() for x in items.columns]

        if verbose is True:
            print("assocpatterns")
            print(assocpatterns)

            print("assocpatterns_stats")
            print(assocpatterns_stats)

            print("assocrules")
            print(assocrules)

            print("items")
            print(items)

        return
Beispiel #3
0
    def _retrieve_AssociationRules_Model(self, modelname, verbose=False):
        """
        Retrieve information about the model to print the results. The 
        Association Rules IDAX function stores its result in 4 tables:
            * <MODELNAME>_ASSOCPATTERNS
            * <MODELNAME>_ASSOCPATTERNS_STATISTICS
            * <MODELNAME>_ASSOCRULES
            * <MODELNAME>_ITEMS

        Parameters
        ----------
        modelname : str
            The name of the model that is retrieved.
        verbose : bool, default: False
            Verbosity mode.

        Notes
        -----
        Needs better formatting instead of printing the tables
        """
        modelname = ibmdbpy.utils.check_tablename(modelname)

        if self._idadb is None:
            raise IdaAssociationRulesError(
                "No Association rules model was trained before.")

        # Note: The name of the columns in hardcoded, this is done so as a
        # workaround for some bug in a specific ODBC linux driver.
        # In case the implementation of the IDA method changes, this may break
        # But still would not be difficult to fix

        assocpatterns = self._idadb.ida_query('SELECT * FROM "' +
                                              self._idadb.current_schema +
                                              '"."' + modelname +
                                              '_ASSOCPATTERNS"')
        assocpatterns.columns = ["ITEMSETID", "ITEMID"]
        assocpatterns.columns = [x.upper() for x in assocpatterns.columns]

        assocpatterns_stats = self._idadb.ida_query(
            'SELECT * FROM "' + self._idadb.current_schema + '"."' +
            modelname + '_ASSOCPATTERNS_STATISTICS"')
        assocpatterns_stats.columns = [
            "ITEMSETID", "LENGTH", "COUNT", "SUPPORT", "LIFT", "PRUNED"
        ]
        assocpatterns_stats.columns = [
            x.upper() for x in assocpatterns_stats.columns
        ]

        assocrules = self._idadb.ida_query('SELECT * FROM "' +
                                           self._idadb.current_schema + '"."' +
                                           modelname + '_ASSOCRULES"')
        assocrules.columns = [
            "RULEID", "ITEMSETID", "BODYID", "HEADID", "CONFIDENCE", "PRUNED"
        ]
        assocrules.columns = [x.upper() for x in assocrules.columns]

        items = self._idadb.ida_query('SELECT * FROM "' +
                                      self._idadb.current_schema + '"."' +
                                      modelname + '_ITEMS"')
        items.columns = ["ITEMID", "ITEM", "ITEMNAME", "COUNT", "SUPPORT"]
        items.columns = [x.upper() for x in items.columns]

        if verbose is True:
            print("assocpatterns")
            print(assocpatterns)
            print(" ")

            print("assocpatterns_stats")
            print(assocpatterns_stats)
            print(" ")

            print("assocrules")
            print(assocrules)
            print(" ")

            print("items")
            print(items)
            print(" ")

        return