Python PAIContext Examples

Programming Language: Python

Namespace/Package Name: odps.pai.context

Class/Type: PAIContext

Examples at hotexamples.com: 2

Python PAIContext - 2 examples found. These are the top rated real world Python examples of odps.pai.context.PAIContext extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

cleanup(1)

odps_data(1)

set_config(1)

Example #1

Show file

File: test_pai_classifiers.py Project: fivejjs/aliyun-odps-python-sdk

 def setUp(self):
     super(TestPaiClassifiers, self).setUp()
     self.create_test_table()
     self.pai_context = PAIContext(self.odps)

Example #2

Show file

File: test_pai_classifiers.py Project: fivejjs/aliyun-odps-python-sdk

class TestPaiClassifiers(TestBase):
    def setUp(self):
        super(TestPaiClassifiers, self).setUp()
        self.create_test_table()
        self.pai_context = PAIContext(self.odps)

    def tearDown(self):
        self.pai_context.cleanup()
        self.odps.run_sql("drop table if exists " + IONOSPHERE_TABLE)
        super(TestPaiClassifiers, self).tearDown()

    def create_test_table(self):
        fields = ','.join('a%02d double' % i for i in range(1, 35)) + ', class bigint'
        self.odps.execute_sql("drop table if exists " + IONOSPHERE_TABLE)
        self.odps.execute_sql("create table %s (%s)" % (IONOSPHERE_TABLE, fields))

        upload_ss = self.tunnel.create_upload_session(IONOSPHERE_TABLE)
        writer = upload_ss.open_record_writer(0)

        for line in open(IONOSPHERE_FILE_PATH, 'r'):
            rec = upload_ss.new_record()
            cols = [float(c) if rec._columns[i].type == 'double' else int(c) for i, c in enumerate(line.split(','))]
            [rec.set(i, val) for i, val in enumerate(cols)]
            writer.write(rec)
        writer.close()
        upload_ss.commit([0,])

    def test_mock_logistic_regression(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        # labeled_data = dataset.set_label_field("category")
        lr = LogisticRegression(epsilon=0.001).set_max_iter(50)
        model = lr.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        # predicted = model.predict(dataset)
        predicted.store_odps("testOut")

    def test_mock_xgboost(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        # labeled_data = dataset.set_label_field("category")
        xgboost = Xgboost(silent=1).set_eta(0.3)
        model = xgboost.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        # predicted = model.predict(dataset)
        predicted.store_odps("testOut")

    def test_random_forests(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        # labeled_data = dataset.set_label_field("category")
        rf = RandomForests(tree_num=10).set_max_tree_deep(10)
        model = rf.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        # predicted = model.predict(dataset)
        predicted.store_odps("testOut")

    def test_gbdt_lr(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        gbdt_lr = GBDTLR(tree_count=500).set_shrinkage(0.05)
        model = gbdt_lr.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        predicted.store_odps("testOut")

    def test_linear_svm(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        svm = LinearSVM(epsilon=0.001).set_cost(1)
        model = svm.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        predicted.store_odps("testOut")

    def test_naive_bayes(self):
        self.pai_context.set_config("execution.mock", True)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        naive_bayes = NaiveBayes()
        model = naive_bayes.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        predicted.store_odps("testOut")

    def test_logistic_regression(self):
        self.pai_context.set_config("execution.mock", False)

        dataset = self.pai_context.odps_data(IONOSPHERE_TABLE)
        splited = dataset.split(0.6)

        labeled_data = splited[0].set_label_field("class")
        lr = LogisticRegression(epsilon=0.001).set_max_iter(50)
        model = lr.train(labeled_data)
        model.store_odps("testOutModel")

        predicted = model.predict(splited[1])
        # store_odps is an operational node which will trigger execution of the flow
        predicted.store_odps("testOut")

        fpr, tpr, thresh = roc_curve(predicted, 1, "class")
        assert len(fpr) == len(tpr) and len(thresh) == len(fpr)