Ejemplo n.º 1
0
    def test_drop_model_by_object(self):
        model_name = str(uuid.uuid1()).replace('-','_')

        # Create model and verify that it's in the get_model_names() list
        model = ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of models")

        # Drop model using the model object
        self.assertEqual(1, ta.drop_models(model), "drop_models() should have deleted one model.")
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 2
0
    def test_drop_model_by_object(self):
        model_name = str(uuid.uuid1()).replace('-','_')

        # Create model and verify that it's in the get_model_names() list
        model = ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of models")

        # Drop model using the model object
        self.assertEqual(1, ta.drop_models(model), "drop_models() should have deleted one model.")
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 3
0
    def test_model_rename(self):
        model_name = str(uuid.uuid1()).replace('-','_')
        new_model_name = str(uuid.uuid1()).replace('-','_')

        model = ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should be in the list of models")

        model.name = new_model_name

        self.assertTrue(new_model_name in ta.get_model_names(), new_model_name + " should be in list of models")
        self.assertFalse(model_name in ta.get_model_names(), model_name + " shoule not be in list of models")
Ejemplo n.º 4
0
    def test_generic_drop_by_name(self):
        model_name =  str(uuid.uuid1()).replace('-','_')

        ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of model names")

        # drop() item by name
        self.assertEqual(1, ta.drop(model_name), "drop() should have deleted one item")

        # check that the model no longer exists
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 5
0
    def test_generic_drop_by_name(self):
        model_name =  str(uuid.uuid1()).replace('-','_')

        ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of model names")

        # drop() item by name
        self.assertEqual(1, ta.drop(model_name), "drop() should have deleted one item")

        # check that the model no longer exists
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 6
0
    def test_generic_drop_by_object(self):
        model_name =  str(uuid.uuid1()).replace('-','_')

        model = ta.KMeansModel(name=model_name)

        # Check that the model we just created now exists
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of model names")

        # drop by entity
        self.assertEqual(1, ta.drop(model), "drop() should have deleted one item")

        # check that the model no longer exists
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 7
0
    def test_generic_drop_by_object(self):
        model_name =  str(uuid.uuid1()).replace('-','_')

        model = ta.KMeansModel(name=model_name)

        # Check that the model we just created now exists
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should exist in the list of model names")

        # drop by entity
        self.assertEqual(1, ta.drop(model), "drop() should have deleted one item")

        # check that the model no longer exists
        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")
Ejemplo n.º 8
0
    def test_model_rename(self):
        model_name = str(uuid.uuid1()).replace('-', '_')
        new_model_name = str(uuid.uuid1()).replace('-', '_')

        model = ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(),
                        model_name + " should be in the list of models")

        model.name = new_model_name

        self.assertTrue(new_model_name in ta.get_model_names(),
                        new_model_name + " should be in list of models")
        self.assertFalse(model_name in ta.get_model_names(),
                         model_name + " shoule not be in list of models")
Ejemplo n.º 9
0
    def test_create_duplicate_kmeans_model(self):
        model_name = str(uuid.uuid1()).replace('-','_')

        ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should be in the list of models")

        # try to create another model with the same name (we expect an exception)
        with self.assertRaises(Exception):
            ta.KMeansModel(name=model_name)
Ejemplo n.º 10
0
    def test_create_duplicate_kmeans_model(self):
        model_name = str(uuid.uuid1()).replace('-', '_')

        ta.KMeansModel(name=model_name)
        self.assertTrue(model_name in ta.get_model_names(),
                        model_name + " should be in the list of models")

        # try to create another model with the same name (we expect an exception)
        with self.assertRaises(Exception):
            ta.KMeansModel(name=model_name)
Ejemplo n.º 11
0
    def test_duplicate_graph_rename(self):
        graph_name1 = str(uuid.uuid1()).replace('-', '_')
        graph_name2 = str(uuid.uuid1()).replace('-', '_')
        model_name = str(uuid.uuid1()).replace('-', '_')
        frame_name = str(uuid.uuid1()).replace('-', '_')

        # Create graphs, model, and frame
        graph1 = ta.Graph(name=graph_name1)
        graph2 = ta.Graph(name=graph_name2)
        ta.KMeansModel(name=model_name)
        ta.Frame(name=frame_name)

        # After creating graphs, check that graphs with each name exists on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should exist in list of graphs")

        # Try to rename graph2 to have the same name as graph1 (we expect an exception here)
        with self.assertRaises(Exception):
            graph2.name = graph_name1

        # Both graph names should still exist on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should still exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should still exist in list of graphs")

        # Try to rename graph1 to have the same name as the frame (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = frame_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            frame_name in ta.get_frame_names(),
            frame_name + " should still exist in the list of frames")

        # Try to rename graph1 to have the same name as the model (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = model_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            model_name in ta.get_model_names(),
            model_name + " should still exist in the list of models")
Ejemplo n.º 12
0
    def test_duplicate_frame_rename(self):
        frame_name1 = str(uuid.uuid1()).replace('-','_')
        frame_name2 = str(uuid.uuid1()).replace('-','_')
        graph_name =  str(uuid.uuid1()).replace('-','_')
        model_name =  str(uuid.uuid1()).replace('-','_')

        # Create frames, graph, and model to test with
        frame1 = ta.Frame(name=frame_name1)
        frame2 = ta.Frame(name=frame_name2)
        ta.Graph(name=graph_name)
        ta.KMeansModel(name=model_name)

        # After creating frames, check that frames with each name exists on the server
        self.assertTrue(frame_name1 in ta.get_frame_names(), frame_name1 + " should exist in list of frames")
        self.assertTrue(frame_name2 in ta.get_frame_names(), frame_name2 + " should exist in list of frames")

        # Try to rename frame2 to have the same name as frame1 (we expect an exception here)
        with self.assertRaises(Exception):
            frame2.name = frame_name1

        # Both frame names should still exist on the server
        self.assertTrue(frame_name1 in ta.get_frame_names(), frame_name1 + " should still exist in list of frames")
        self.assertTrue(frame_name2 in ta.get_frame_names(), frame_name2 + " should still exist in list of frames")

        # Try to rename frame1 to have the same name as the graph (we expect an exception here)
        with self.assertRaises(Exception):
            frame1.name = graph_name

        # frame1 and the graph should still exist on the server
        self.assertTrue(frame_name1 in ta.get_frame_names(), frame_name1 + " should still exist in the list of frames")
        self.assertTrue(graph_name in ta.get_graph_names(), graph_name + " should still exist in the list of graphs")

        # Try to rename frame1 to have the same name as the model (we expect an exception here)
        with self.assertRaises(Exception):
            frame1.name = model_name

        # frame1 and the model should still exist on the server
        self.assertTrue(frame_name1 in ta.get_frame_names(), frame_name1 + " should still exist in the list of frames")
        self.assertTrue(model_name in ta.get_model_names(), model_name + " should still exist in the list of models")
Ejemplo n.º 13
0
    def test_naive_bayes(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/ae6a38d3-191f-494f-86a6-3fe1b2255902/e3327582-f475-4dc9-8efa-96070abb606d/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'ModelNaiveBayesFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        train_frame = ta.Frame(csv, frame_name)

        print "Initializing a RandomForestModel object"
        model_name = 'POCModelNaiveBayesModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        naive = ta.NaiveBayesModel(name=model_name)

        print "Training the model on the Frame"
        naive.train(train_frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = naive.predict(train_frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
    def testLinearRegression(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/9bb351fa-7b17-4a81-b3b0-521639c1d473/d342214b-c4c0-4963-aeaf-5adf054e22b6/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'Random_forest_SampleFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        frame = ta.Frame(csv, frame_name)

        #frame = ta.Frame(csv)

        print "Initializing a RandomForestModel object"
        model_name = 'POCRandom_forest_SampleModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        #model = ta.LinearRegressionModel(name=model_name)
        classifier = ta.RandomForestClassifierModel(name=model_name)

        print "Training the model on the Frame"
        classifier .train(frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = classifier.predict(frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
Ejemplo n.º 15
0
    def test_drop_model_that_does_not_exist(self):
        model_name = str(uuid.uuid1()).replace('-','_')

        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")

        self.assertEqual(0, ta.drop_models(model_name), "drop_models() shouldn't have deleted any models.")
Ejemplo n.º 16
0
    def test_drop_model_that_does_not_exist(self):
        model_name = str(uuid.uuid1()).replace('-','_')

        self.assertFalse(model_name in ta.get_model_names(), model_name + " should not exist in the list of models")

        self.assertEqual(0, ta.drop_models(model_name), "drop_models() shouldn't have deleted any models.")