Python clfreport Exemples, healthcareai.common.model_eval.clfreport Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : develop_supervised_model.py Projet : zlianggithub/healthcareai-py

    def random_forest(self, cores=4, trees=200, tune=False, debug=False):
        """
        This method creates and assesses the accuracy of a logistic regression
        model.

        Parameters
        ----------
        cores (num) : Number of cores to use (default 4)
        trees (num) : Number of trees in the random forest (default 200)
        tune (boolean) : Whether to tune hyperparameters. This iterates number
        of trees from 100, 250, and 500.
        debug (boolean) : Verbosity of output (default False)

        Returns
        -------
        Nothing. Output to console describes model accuracy.
        """

        # TODO: refactor, such that each algo doesn't need an if/else tree
        if self.modeltype == 'classification':
            algo = RandomForestClassifier(n_estimators=trees,
                                          verbose=(2 if debug is True else 0))

        elif self.modeltype == 'regression':
            algo = RandomForestRegressor(n_estimators=trees,
                                         verbose=(2 if debug is True else 0))

        else:  # Here to appease pep8
            algo = None

        params = {
            'max_features':
            model_eval.calculate_rfmtry(len(self.X_test.columns),
                                        self.modeltype)
        }

        self.col_list = self.X_train.columns.values

        self.y_probab_rf, self.au_roc, self.rfclf = model_eval.clfreport(
            modeltype=self.modeltype,
            debug=debug,
            devcheck='yesdev',
            algo=algo,
            X_train=self.X_train,
            y_train=self.y_train,
            X_test=self.X_test,
            y_test=self.y_test,
            param=params,
            cores=cores,
            tune=tune,
            col_list=self.col_list)

Exemple #2

0

Afficher le fichier

Fichier : develop_supervised_model.py Projet : zlianggithub/healthcareai-py

    def linear(self, cores=4, debug=False):
        """
        This method creates and assesses the accuracy of a logistic regression
        model.

        Parameters
        ----------
        cores (num) : Number of cores to use (default 4)
        debug (boolean) : Verbosity of output (default False)

        Returns
        -------
        Nothing. Output to console describes model accuracy.
        """

        if self.modeltype == 'classification':
            algo = LogisticRegressionCV(cv=5)

        # TODO: get GroupLasso working via lightning

        # TODO: see if CV splits needed for linear regress

        elif self.modeltype == 'regression':
            algo = LinearRegression()
        else:
            algo = None

        self.y_probab_linear, self.au_roc = model_eval.clfreport(
            modeltype=self.modeltype,
            debug=debug,
            devcheck='yesdev',
            algo=algo,
            X_train=self.X_train,
            y_train=self.y_train,
            X_test=self.X_test,
            y_test=self.y_test,
            cores=cores)

Exemple #3

0

Afficher le fichier

Fichier : deploy_supervised_model.py Projet : zlianggithub/healthcareai-py

    def deploy(self,
               method,
               cores,
               server,
               dest_db_schema_table,
               trees=200,
               mtry=None,
               use_saved_model=False,
               debug=False):
        """"Describe the method"""

        if debug:
            print("""\ngraincol test shape and cell type before db
            prelim-insert check""")
            print(np.shape(self.graincol_test))
            print(type(self.graincol_test.iloc[0]))

        # First, check the connection by inserting test data (and rolling back)
        cecnxn = pyodbc.connect("""DRIVER={SQL Server Native Client 11.0};
                                   SERVER=""" + server + """;
                                   Trusted_Connection=yes;""")
        cursor = cecnxn.cursor()
        if self.modeltype == 'classification':
            predictedvalcol = 'PredictedProbNBR'
        else:
            predictedvalcol = 'PredictedValueNBR'
        # The following allows output to work with datetime/datetime2
        dt = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]

        try:

            cursor.execute(
                """insert into """ + dest_db_schema_table + """
                           (BindingID, BindingNM, LastLoadDTS, """ +
                self.graincol + """,""" + predictedvalcol + """,
                           Factor1TXT, Factor2TXT, Factor3TXT)
                           values (0, 'PyTest', ?, ?, 0.98,
                           'FirstCol', 'SecondCol', 'ThirdCol')""",
                (dt, int(self.graincol_test.iloc[0])))
            cecnxn.rollback()

            print("\nSuccessfully inserted a test row into {}.".format(
                dest_db_schema_table))
            print("SQL insert successfuly rolled back (since it was a test).")

        except pyodbc.DatabaseError:
            print("\nFailed to insert values into {}.".format(
                dest_db_schema_table))
            print("Check that the table exists with right col structure")
            print("Example column structure can be found in the docs")
            print("Your GrainID col might not match that in your input table")

        finally:
            try:
                cecnxn.close()
            except pyodbc.DatabaseError:
                print("""\nAn attempt to complete a transaction has failed.
                No corresponding transaction found. \nPerhaps you don''t have
                permission to write to this server.""")

        if self.modeltype == 'classification' and method == 'linear':

            algorithm = LogisticRegression(n_jobs=cores)

            self.y_pred = model_eval.clfreport(modeltype=self.modeltype,
                                               debug=debug,
                                               devcheck='notdev',
                                               algo=algorithm,
                                               X_train=self.X_train,
                                               y_train=self.y_train,
                                               X_test=self.X_test,
                                               use_saved_model=use_saved_model)

        elif self.modeltype == 'regression' and method == 'linear':

            algorithm = LinearRegression(n_jobs=cores)

            self.y_pred = model_eval.clfreport(modeltype=self.modeltype,
                                               debug=debug,
                                               devcheck='notdev',
                                               algo=algorithm,
                                               X_train=self.X_train,
                                               y_train=self.y_train,
                                               X_test=self.X_test,
                                               use_saved_model=use_saved_model)

        if self.modeltype == 'classification' and method == 'rf':

            # TODO: think about moving this to model_eval mtry function
            if not mtry:
                mtry = math.floor(math.sqrt(len(self.X_train.columns.values)))

            algorithm = RandomForestClassifier(
                n_estimators=trees,
                max_features=mtry,
                n_jobs=cores,
                verbose=(2 if debug is True else 0))

            self.y_pred = model_eval.clfreport(modeltype=self.modeltype,
                                               debug=debug,
                                               devcheck='notdev',
                                               algo=algorithm,
                                               X_train=self.X_train,
                                               y_train=self.y_train,
                                               X_test=self.X_test,
                                               use_saved_model=use_saved_model)

        elif self.modeltype == 'regression' and method == 'rf':

            # TODO: think about moving this to model_eval mtry function
            if not mtry:
                mtry = math.floor(len(self.X_train.columns.values) / 3)

            algorithm = RandomForestRegressor(
                n_estimators=trees,
                max_features=mtry,
                n_jobs=cores,
                verbose=(2 if debug is True else 0))

            self.y_pred = model_eval.clfreport(modeltype=self.modeltype,
                                               debug=debug,
                                               devcheck='notdev',
                                               algo=algorithm,
                                               X_train=self.X_train,
                                               y_train=self.y_train,
                                               X_test=self.X_test,
                                               use_saved_model=use_saved_model)

        # Calculate three imp columns
        first_fact, second_fact, third_fact = model_eval. \
            findtopthreefactors(debug,
                                self.X_train,
                                self.y_train,
                                self.X_test,
                                self.modeltype,
                                use_saved_model)

        # Convert to base int instead of numpy data type for SQL insert
        graincol_baseint = [
            int(self.graincol_test.iloc[i])
            for i in range(0, len(self.graincol_test))
        ]
        y_pred_baseint = [
            float(self.y_pred[i]) for i in range(0, len(self.y_pred))
        ]

        # Create columns for export to SQL Server
        X_test_length = len(self.X_test.iloc[:, 0])
        bindingid = [0] * X_test_length
        bindingnm = ['Python'] * X_test_length

        # Create vector with time to the millisecond
        lastloaddts = [datetime.datetime.utcnow().
                       strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]] * \
            X_test_length

        # Put everything into 2-d list for export
        output_2dlist = list(
            zip(bindingid, bindingnm, lastloaddts, graincol_baseint,
                y_pred_baseint, first_fact, second_fact, third_fact))

        if debug:
            print('\nTop rows of 2-d list immediately before insert into db')
            print(pd.DataFrame(output_2dlist[0:3]).head())

        cecnxn = pyodbc.connect("""DRIVER={SQL Server Native Client 11.0};
                                   SERVER=""" + server + """;
                                   Trusted_Connection=yes;""")
        cursor = cecnxn.cursor()

        try:
            cursor.executemany(
                """insert into """ + dest_db_schema_table + """
                               (BindingID, BindingNM, LastLoadDTS, """ +
                self.graincol + """,""" + predictedvalcol + """,
                               Factor1TXT, Factor2TXT, Factor3TXT)
                               values (?,?,?,?,?,?,?,?)""", output_2dlist)
            cecnxn.commit()

            # Todo: count and display (via pyodbc) how many rows inserted
            print("\nSuccessfully inserted rows into {}.".format(
                dest_db_schema_table))

        except pyodbc.DatabaseError:
            print("\nFailed to insert values into {}.".format(
                dest_db_schema_table))
            print("Was your test insert successful earlier?")
            print("If so, what has changed with your entity since then?")

        finally:
            try:
                cecnxn.close()
            except pyodbc.DatabaseError:
                print("""\nAn attempt to complete a transaction has failed.
                      No corresponding transaction found. \nPerhaps you don't
                      have permission to write to this server.""")