Ejemplo n.º 1
0
    def test_frame_drop(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv",
                         schema=[('a', str), ('b', ta.int32),
                                 ('labels', ta.int32),
                                 ('predictions', ta.int32)],
                         delimiter=',',
                         skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by entity"
        ta.drop_frames(frame)
        frames = ta.get_frame_names()
        self.assertFalse("test_frame_drop" in frames,
                         "test_frame_drop should not exist in list of frames")

        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by name"
        self.assertEqual(1, ta.drop_frames("test_frame_drop"),
                         "drop_frames() should have deleted one frame")
        self.assertFalse("test_frame_drop" in frames,
                         "test_frame_drop should not exist in list of frames")
Ejemplo n.º 2
0
    def test_access_refreshes_frames(self):
        """Tests that some actions do or do not update the last_read_date entity property"""
        csv = ta.CsvFile("/datasets/dates.csv", schema= [('start', ta.datetime),
                                                         ('id', int),
                                                         ('stop', ta.datetime),
                                                         ('color', str)], delimiter=',')
        name = "update_last_read"
        if name in ta.get_frame_names():
            ta.drop_frames(name)
        f = ta.Frame(csv, name=name)  # name it, to save it from the other GC blasting test in here
        t0 = f.last_read_date
        t1 = f.last_read_date
        #print "t0=%s" % t0.isoformat()
        self.assertEqual(t0, t1)

        f.schema  # schema, or other meta data property reads, should not update the last read date
        t2 = f.last_read_date
        #print "t2=%s" % t2.isoformat()
        self.assertEqual(t0, t2)

        f.inspect()  # inspect should update the last read date
        t3 = f.last_read_date
        #print "t3=%s" % t3.isoformat()
        self.assertLess(t2,t3)

        f.copy()  # copy should update the last read date
        t4 = f.last_read_date
        #print "t4=%s" % t4.isoformat()
        self.assertLess(t3,t4)

        f.bin_column('id', [3, 5, 8])
        t5 = f.last_read_date
        #print "t5=%s" % t5.isoformat()
        self.assertLess(t4,t5)
Ejemplo n.º 3
0
    def test_gc_drop_stale_and_finalize(self):
        csv = ta.CsvFile("/datasets/dates.csv",
                         schema=[('start', ta.datetime), ('id', int),
                                 ('stop', ta.datetime), ('color', str)],
                         delimiter=',')
        f2_name = "dates_two"
        if f2_name in ta.get_frame_names():
            ta.drop_frames(f2_name)

        f1 = ta.Frame(csv)
        f1e = f1.get_error_frame()
        self.assertIsNotNone(f1e)
        self.assertIsNone(f1e.name)
        f2 = ta.Frame(csv, name=f2_name)
        f2e = f2.get_error_frame()
        self.assertIsNotNone(f2e)
        self.assertIsNone(f2e.name)

        admin.drop_stale(
        )  # first, normal drop_stale, nothing should change because these frames aren't old enough
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped(
        )  # nothing is dropped, so nothing so be finalized
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)

        admin.drop_stale(
            "1ms"
        )  # now drop with very tiny age, so non-name f1 should get dropped
        self.assertEqual("DROPPED", f1.status)
        self.assertEqual("DROPPED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped(
        )  # on f1 and f1e are dropped, so only they should be finalized
        self.assertEqual("FINALIZED", f1.status)
        self.assertEqual("FINALIZED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
Ejemplo n.º 4
0
    def test_drop_frame_that_does_not_exist(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')

        self.assertFalse(
            frame_name in ta.get_frame_names(),
            frame_name + " should not exist in the list of frames")

        self.assertEqual(0, ta.drop_frames(frame_name),
                         "drop_frames() should not have deleted any frames")
Ejemplo n.º 5
0
    def test_gc_drop_stale_and_finalize(self):
        csv = ta.CsvFile("/datasets/dates.csv", schema= [('start', ta.datetime),
                                                         ('id', int),
                                                         ('stop', ta.datetime),
                                                         ('color', str)], delimiter=',')
        f2_name = "dates_two"
        if f2_name in ta.get_frame_names():
            ta.drop_frames(f2_name)

        f1 = ta.Frame(csv)
        f1e = f1.get_error_frame()
        self.assertIsNotNone(f1e)
        self.assertIsNone(f1e.name)
        f2 = ta.Frame(csv, name=f2_name)
        f2e = f2.get_error_frame()
        self.assertIsNotNone(f2e)
        self.assertIsNone(f2e.name)

        admin.drop_stale()  # first, normal drop_stale, nothing should change because these frames aren't old enough
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped()  # nothing is dropped, so nothing so be finalized
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)

        admin.drop_stale("1ms")  # now drop with very tiny age, so non-name f1 should get dropped
        self.assertEqual("DROPPED", f1.status)
        self.assertEqual("DROPPED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped()  # on f1 and f1e are dropped, so only they should be finalized
        self.assertEqual("FINALIZED", f1.status)
        self.assertEqual("FINALIZED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
Ejemplo n.º 6
0
    def test_frame_drop(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv", schema= [('a', str),
                                                                          ('b', ta.int32),
                                                                          ('labels', ta.int32),
                                                                          ('predictions', ta.int32)], delimiter=',', skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by entity"
        ta.drop_frames(frame)
        frames = ta.get_frame_names()
        self.assertFalse("test_frame_drop" in frames, "test_frame_drop should not exist in list of frames")

        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by name"
        self.assertEqual(1, ta.drop_frames("test_frame_drop"), "drop_frames() should have deleted one frame")
        self.assertFalse("test_frame_drop" in frames, "test_frame_drop should not exist in list of frames")
Ejemplo n.º 7
0
    def test_generic_drop_duplicate_items(self):
        frame_name = str(uuid.uuid1()).replace('-','_')
        frame = ta.Frame(name=frame_name)

        # Check that the frame we just created now exists
        self.assertTrue(frame_name in ta.get_frame_names(), frame_name + " should exist in the list of frame names")

        # drop_frames() with multiple of the same item
        self.assertEqual(1, ta.drop_frames([frame, frame, frame_name]), "drop_frames() should have deleted 1 item")

        # Check that the frame no longer exists
        self.assertFalse(frame_name in ta.get_frame_names(), frame_name + " should not be in the list of frame names")
Ejemplo n.º 8
0
    def test_access_refreshes_frames(self):
        """Tests that some actions do or do not update the last_read_date entity property"""
        csv = ta.CsvFile("/datasets/dates.csv",
                         schema=[('start', ta.datetime), ('id', int),
                                 ('stop', ta.datetime), ('color', str)],
                         delimiter=',')
        name = "update_last_read"
        if name in ta.get_frame_names():
            ta.drop_frames(name)
        f = ta.Frame(
            csv, name=name
        )  # name it, to save it from the other GC blasting test in here
        t0 = f.last_read_date
        t1 = f.last_read_date
        #print "t0=%s" % t0.isoformat()
        self.assertEqual(t0, t1)

        f.schema  # schema, or other meta data property reads, should not update the last read date
        t2 = f.last_read_date
        #print "t2=%s" % t2.isoformat()
        self.assertEqual(t0, t2)

        f.inspect()  # inspect should update the last read date
        t3 = f.last_read_date
        #print "t3=%s" % t3.isoformat()
        self.assertLess(t2, t3)

        f.copy()  # copy should update the last read date
        t4 = f.last_read_date
        #print "t4=%s" % t4.isoformat()
        self.assertLess(t3, t4)

        f.bin_column('id', [3, 5, 8])
        t5 = f.last_read_date
        #print "t5=%s" % t5.isoformat()
        self.assertLess(t4, t5)
Ejemplo n.º 9
0
    def test_generic_drop_duplicate_items(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')
        frame = ta.Frame(name=frame_name)

        # Check that the frame we just created now exists
        self.assertTrue(
            frame_name in ta.get_frame_names(),
            frame_name + " should exist in the list of frame names")

        # drop_frames() with multiple of the same item
        self.assertEqual(1, ta.drop_frames([frame, frame, frame_name]),
                         "drop_frames() should have deleted 1 item")

        # Check that the frame no longer exists
        self.assertFalse(
            frame_name in ta.get_frame_names(),
            frame_name + " should not be in the list of frame names")
Ejemplo n.º 10
0
    def test_naive_bayes(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/ae6a38d3-191f-494f-86a6-3fe1b2255902/e3327582-f475-4dc9-8efa-96070abb606d/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'ModelNaiveBayesFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        train_frame = ta.Frame(csv, frame_name)

        print "Initializing a RandomForestModel object"
        model_name = 'POCModelNaiveBayesModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        naive = ta.NaiveBayesModel(name=model_name)

        print "Training the model on the Frame"
        naive.train(train_frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = naive.predict(train_frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
    def testLinearRegression(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/9bb351fa-7b17-4a81-b3b0-521639c1d473/d342214b-c4c0-4963-aeaf-5adf054e22b6/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'Random_forest_SampleFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        frame = ta.Frame(csv, frame_name)

        #frame = ta.Frame(csv)

        print "Initializing a RandomForestModel object"
        model_name = 'POCRandom_forest_SampleModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        #model = ta.LinearRegressionModel(name=model_name)
        classifier = ta.RandomForestClassifierModel(name=model_name)

        print "Training the model on the Frame"
        classifier .train(frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = classifier.predict(frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
Ejemplo n.º 12
0
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import trustedanalytics as ta
ta.connect()

for name in ta.get_frame_names():
    print 'deleting frame: %s' % name
    ta.drop_frames(name)

employees_frame = ta.Frame(
    ta.CsvFile("employees.csv",
               schema=[('Employee', str), ('Manager', str), ('Title', str),
                       ('Years', ta.int64)],
               skip_header_lines=1), 'employees_frame')

employees_frame.inspect()

#A bipartite graph
#Notice that this is a funny example since managers are also employees!
#Preseuambly Steve the manager and Steve the employee are the same person

#Option 1
                                                "." + parameters.table_name))
else:
    raise AtkTestException(
        "Column was NOT added to table {}".format(parameters.database_name +
                                                  "." + parameters.table_name))

print("\n------------ Test: drop_column method ------------")
frame_select.drop_columns('test_column')

frame_drop_columns_list = frame_select.column_names
frame_drop_columns_len = len(frame_drop_columns_list)

if frame_drop_columns_len == column_names_len and 'test_column' not in frame_drop_columns_list:
    print("Column 'test_column' was dropped in table {}".format(
        parameters.database_name + "." + parameters.table_name))
else:
    raise AtkTestException(
        "Column was NOT dropped in table {}".format(parameters.database_name +
                                                    "." +
                                                    parameters.table_name))

print("\n------------ Test: drop_frames method ----------------")
ta.drop_frames([frame_select, frame_group])
if frame_select.status != 'Active' and frame_group.status != 'Active':
    print("Frames {}, {} removed successfully".format(frame_select,
                                                      frame_group))
else:
    raise AtkTestException(
        "Frames {}, {} NOT deleted. Status of both frames should be Deleted ".
        format(frame_select, frame_group))
Ejemplo n.º 14
0
    def test_drop_frame_that_does_not_exist(self):
        frame_name = str(uuid.uuid1()).replace('-','_')

        self.assertFalse(frame_name in ta.get_frame_names(), frame_name + " should not exist in the list of frames")

        self.assertEqual(0, ta.drop_frames(frame_name), "drop_frames() should not have deleted any frames")
Ejemplo n.º 15
0
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import trustedanalytics as ta
ta.connect()

for name in ta.get_frame_names():
    print 'deleting frame: %s' %name
    ta.drop_frames(name)


employees_frame = ta.Frame(ta.CsvFile("employees.csv", schema = [('Employee', str), ('Manager', str), ('Title', str), ('Years', ta.int64)], skip_header_lines=1), 'employees_frame')

employees_frame.inspect()

#A bipartite graph
#Notice that this is a funny example since managers are also employees!
#Preseuambly Steve the manager and Steve the employee are the same person

#Option 1

graph = ta.Graph()
graph.define_vertex_type('Employee')
graph.define_edge_type('worksunder', 'Employee', 'Employee', directed=False)
Ejemplo n.º 16
0
                          ], skip_header_lines=1);


# In[27]:

#create frame
#frame_name = "myframe";
#if frame_name in ta.get_frame_names():
    #ta.drop_frames(frame_name)
    
    
frame_name = 'myframe'
exist_frames = ta.get_frame_names()
if frame_name in exist_frames:
    print "Frame exists, delete it"
    ta.drop_frames(frame_name)
        
my_frame = ta.Frame(csv, frame_name)
my_frame.inspect(21)


# In[31]:

#feature classify

def transformation_DBP(row):
    #<60一组,60~90每10mmHg一组,≥90一组
    dbp = row.DBP
    if dbp < 60.0:
        dbp = 1;
    if dbp >= 60.0 and dbp < 70.0: