コード例 #1
0
    def test_unique_values(self):
        df = pd.read_csv(fixture('iris_classification.csv'), na_values=['None'])
        unique_drgs = len(df.DRG.unique())
        test_df = impact_coding_on_a_single_column(df, 'species', 'DRG')
        unique_impact_values = len(test_df.DRG_impact_coded.unique())

        self.assertLessEqual(unique_impact_values, unique_drgs)
コード例 #2
0
    def test_column_renaming(self):
        df = pd.read_csv(fixture('iris_classification.csv'),
                         na_values=['None'])
        code_column_name = 'DRG'
        test_df = impact_coding_on_a_single_column(df, 'species',
                                                   code_column_name)

        self.assertTrue((code_column_name +
                         '_impact_coded') in test_df.columns)
コード例 #3
0
    def setUp(self):
        cols = ['ThirtyDayReadmitFLG', 'SystolicBPNBR', 'LDLNBR']
        df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'),
                         na_values=['None'],
                         usecols=cols)

        np.random.seed(42)
        self.o = DevelopSupervisedModel(modeltype='classification',
                                        df=df,
                                        predictedcol='ThirtyDayReadmitFLG',
                                        impute=True)
コード例 #4
0
    def setUp(self):
        df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'),
                         na_values=['None'])

        # Drop uninformative columns
        df.drop(['PatientID', 'InTestWindowFLG'], axis=1, inplace=True)

        np.random.seed(42)
        self.o = DevelopSupervisedModel(modeltype='classification',
                                        df=df,
                                        predictedcol='ThirtyDayReadmitFLG',
                                        impute=True)
        self.o.linear(cores=1)
コード例 #5
0
    def setUp(self):
        df = pd.read_csv(fixture('DiabetesClinicalSampleData.csv'),
                         na_values=['None'])

        # Drop uninformative columns
        df.drop(['PatientID', 'InTestWindowFLG'], axis=1, inplace=True)

        # Convert numeric columns to factor/category columns
        np.random.seed(42)
        self.o = DevelopSupervisedModel(modeltype='classification',
                                        df=df,
                                        predictedcol='ThirtyDayReadmitFLG',
                                        impute=True)
        self.o.random_forest(cores=1)
コード例 #6
0
    def setUp(self):
        df = pd.read_csv(fixture('HCPyDiabetesClinical.csv'),
                         na_values=['None'])
        df.drop('PatientID', axis=1, inplace=True)  # drop uninformative column

        np.random.seed(42)
        self.o = DeploySupervisedModel(modeltype='classification',
                                       df=df,
                                       graincol='PatientEncounterID',
                                       windowcol='InTestWindowFLG',
                                       predictedcol='ThirtyDayReadmitFLG',
                                       impute=True)
        self.o.deploy(
            method='linear',
            cores=1,
            server='localhost',
            dest_db_schema_table='[SAM].[dbo].[HCPyDeployClassificationBASE]',
            use_saved_model=False)