def test_tree_depth(self): self.create_tree_graph(TREE_GRAPH_EDGE_TABLE) tree_ds = DataFrame(self.odps.get_table(TREE_GRAPH_EDGE_TABLE)) \ .roles(from_vertex='flow_out_id', to_vertex='flow_in_id') output = TreeDepth().transform(tree_ds)._add_case(self.gen_check_params_case( {'outputTableName': tn('pyodps_test_ml_tree_depth'), 'fromVertexCol': 'flow_out_id', 'workerMem': '4096', 'inputEdgeTableName': tn('pyodps_test_ml_tree_graph_edge'), 'toVertexCol': 'flow_in_id', 'splitSize': '64'})) output.persist(TREE_DEPTH_TABLE)
def test_quantile(self): options.ml.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') qt = quantile(df, _cases=self.gen_check_params_case( {'inputTableName': tn('pyodps_test_ml_ionosphere'), 'outputTableName': tn('pyodps_test_ml_iono_quantile'), 'colName': ','.join('a%02d' % i for i in range(1, 35)), 'N': '100'})) qt.persist(IONOSPHERE_QUANTILE_TABLE)
def test_quantile(self): options.runner.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') qt = quantile(df, _cases=self.gen_check_params_case( {'inputTableName': tn('pyodps_test_ml_ionosphere'), 'outputTableName': tn('pyodps_test_ml_iono_quantile'), 'colName': ','.join('a%02d' % i for i in range(1, 35)), 'N': '100'})) qt.persist(IONOSPHERE_QUANTILE_TABLE)
def test_doc2vec(self): word_df, doc_df, _ = Doc2Vec().transform(self.df) doc_df._add_case(self.gen_check_params_case( {'minCount': '5', 'docColName': 'content', 'hs': '1', 'inputTableName': tn('pyodps_test_ml_corpus'), 'negative': '0', 'layerSize': '100', 'sample': '0', 'randomWindow': '1', 'window': '5', 'docIdColName': 'id', 'iterTrain': '1', 'alpha': '0.025', 'cbow': '0', 'outVocabularyTableName': 'tmp_pyodps__doc2_vec', 'outputWordTableName': 'tmp_pyodps__doc2_vec', 'outputDocTableName': tn('pyodps_test_ml_doc2vec_doc_result')})) doc_df.persist(DOC2VEC_DOC_TABLE)
def test_semantic_vector_distance(self): result_df = semantic_vector_distance(self.df) result_df._add_case( self.gen_check_params_case({ 'topN': '5', 'outputTableName': tn('pyodps_test_ml_semantic_dist_result'), 'distanceType': 'euclidean', 'inputTableName': tn('pyodps_test_ml_corpus') })) result_df.persist(SEMANTIC_DIST_TABLE)
def test_mat_pearson(self): options.runner.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') matrix_pearson(df, _cases=self.gen_check_params_case( {'outputTableName': 'tmp_pyodps_ml_matrix_pearson_0_2_res', 'selectedColNames': ','.join('a%02d' % i for i in range(1, 35)), 'inputTableName': tn('pyodps_test_ml_ionosphere')}))
def test_chisquare(self): options.runner.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) chi_square(df, x_col=df.a01, y_col='class', _cases=self.gen_check_params_case( {'yColName': 'class', 'xColName': 'a01', 'outputDetailTableName': 'tmp_pyodps_ml_chi_square_0_1_res_2', 'outputTableName': 'tmp_pyodps_ml_chi_square_0_1_res_1', 'inputTableName': tn('pyodps_test_ml_ionosphere')}))
def test_chisquare(self): options.ml.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) chi_square(df, x_col=df.a01, y_col='class', _cases=self.gen_check_params_case( {'yColName': 'class', 'xColName': 'a01', 'outputDetailTableName': 'tmp_pyodps__chi_square', 'outputTableName': 'tmp_pyodps__chi_square', 'inputTableName': tn('pyodps_test_ml_ionosphere')}))
def test_mat_pearson(self): options.ml.dry_run = True df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') matrix_pearson(df, _cases=self.gen_check_params_case( {'outputTableName': 'tmp_pyodps__matrix_pearson', 'selectedColNames': ','.join('a%02d' % i for i in range(1, 35)), 'inputTableName': tn('pyodps_test_ml_ionosphere')}))
def test_tree_depth(self): self.create_tree_graph(TREE_GRAPH_EDGE_TABLE) tree_ds = DataFrame(self.odps.get_table(TREE_GRAPH_EDGE_TABLE)) \ .roles(from_vertex='flow_out_id', to_vertex='flow_in_id') output = TreeDepth().transform(tree_ds)._add_case( self.gen_check_params_case({ 'outputTableName': tn('pyodps_test_ml_tree_depth'), 'fromVertexCol': 'flow_out_id', 'workerMem': '4096', 'inputEdgeTableName': tn('pyodps_test_ml_tree_graph_edge'), 'toVertexCol': 'flow_in_id', 'splitSize': '64' })) output.persist(TREE_DEPTH_TABLE)
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function import logging from odps.df import DataFrame from odps.ml.classifiers import LogisticRegression from odps.ml.cross_validation import cross_val_score from odps.ml.tests.base import MLTestBase, tn, ci_skip_case logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') class TestCrossValidation(MLTestBase): def setUp(self): super(TestCrossValidation, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) self.df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') def tearDown(self): super(TestCrossValidation, self).tearDown() @ci_skip_case def test_logistic_regression(self): lr = LogisticRegression(epsilon=0.001).set_max_iter(50) print(cross_val_score(lr, self.df))
from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.regression import * from odps.ml.feature import * from odps.ml.statistics import * from odps.ml.tests.base import MLTestBase, tn, otm, ci_skip_case from odps.ml.metrics import * import logging logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') XGBOOST_OUT_TABLE = tn('pyodps_test_xgboost_out') GBDT_OUT_TABLE = tn('pyodps_test_gbdt_out') LINEAR_REGRESSION_OUT_TABLE = tn('pyodps_test_linear_reg_out') LINEAR_SVR_OUT_TABLE = tn('pyodps_test_linear_svr_out') LASSO_OUT_TABLE = tn('pyodps_test_lasso_out') RIDGE_OUT_TABLE = tn('pyodps_test_ridge_out') MODEL_NAME = tn('pyodps_test_out_model') class TestMLRegression(MLTestBase): def setUp(self): super(TestMLRegression, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import textwrap from odps import options from odps.df import DataFrame from odps.ml.tests.base import MLTestBase, tn IRIS_TABLE = tn('pyodps_test_ml_iris') TEMP_TABLE_1_NAME = tn('pyodps_test_mixin_test_table1') TEMP_TABLE_2_NAME = tn('pyodps_test_mixin_test_table2') def _df_roles(df): return dict( (f.name, ','.join(r.name for r in f.role)) for f in df._ml_fields) def _df_continuity(df): return dict((f.name, f.continuity.name) for f in df._ml_fields) def _df_key_value(df): return dict((f.name, repr(f.kv_config) if f.kv_config else '')
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import textwrap from odps import options from odps.df import DataFrame from odps.runner import adapter_from_df from odps.ml.adapter import merge_data from odps.ml.tests.base import MLTestBase, tn IRIS_TABLE = tn("pyodps_test_ml_iris") TEMP_TABLE_1_NAME = tn("pyodps_test_mixin_test_table1") TEMP_TABLE_2_NAME = tn("pyodps_test_mixin_test_table2") def _df_roles(df): return dict((f.name, ",".join(r.name for r in f.role)) for f in adapter_from_df(df).fields) def _df_continuity(df): return dict((f.name, f.continuity.name) for f in adapter_from_df(df).fields) def _df_key_value(df): return dict((f.name, repr(f.kv_config) if f.kv_config else "") for f in adapter_from_df(df).fields)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json from odps import utils from odps.ml import utils as ml_utils from odps.ml.tests.base import MLTestBase, tn TEST_LR_MODEL_NAME = tn('pyodps_test_lr_model') TEST_TABLE_MODEL_NAME = tn('pyodps_table_model') TEST_TEMP_TABLE_MODEL_NAME = tn(utils.TEMP_TABLE_PREFIX + 'table_model') IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') class Test(MLTestBase): def testNonTemp(self): model_comment = dict(key='value') model_table_name1 = ml_utils.build_model_table_name(TEST_TABLE_MODEL_NAME, 'st1') self.odps.execute_sql('drop table if exists {0}'.format(model_table_name1)) self.odps.execute_sql('create table if not exists {0} (col1 string) comment \'{1}\' lifecycle 1'.format( model_table_name1, utils.escape_odps_string(json.dumps(model_comment)) )) model_table_name2 = ml_utils.build_model_table_name(TEST_TABLE_MODEL_NAME, 'st2')
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from odps import options, DataFrame from odps.ml.feature import * from odps.ml.expr import PmmlModel from odps.ml.tests.base import MLTestBase, tn TEST_LR_MODEL_NAME = tn('pyodps_test_lr_model') IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') SELECT_FEATURE_OUTPUT_TABLE = tn('pyodps_test_ml_select_feature_output') class Test(MLTestBase): def setUp(self): super(Test, self).setUp() self.create_test_pmml_model(TEST_LR_MODEL_NAME) self.create_ionosphere(IONOSPHERE_TABLE) self.df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).label_field('class') self.model = PmmlModel(_source_data=self.odps.get_offline_model(TEST_LR_MODEL_NAME)) options.ml.dry_run = True def test_rf_importance(self): rf_importance(self.df, self.model, core_num=1, core_mem=1024, _cases=self.gen_check_params_case({
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.text import * from odps.ml.tests.base import MLTestBase, tn CORPUS_TABLE = tn('pyodps_test_ml_corpus') WORD_TRIPLE_TABLE = tn('pyodps_test_ml_word_triple') SPLITED_TABLE = tn('pyodps_test_ml_splited_text') NOISE_TABLE = tn('pyodps_test_ml_noises') W2V_TABLE = tn('pyodps_test_ml_w2v') TFIDF_TABLE = tn('pyodps_test_ml_tf_idf') LDA_TABLE = tn('pyodps_test_ml_plda') STR_COMP_TABLE = tn('pyodps_test_ml_str_comp') COMP_RESULT_TABLE = tn('pyodps_test_ml_str_comp_result') TOP_N_TABLE = tn('pyodps_test_ml_top_n_result') FILTERED_WORDS_TABLE = tn('pyodps_test_ml_filtered_words_result') KW_EXTRACTED_TABLE = tn('pyodps_test_ml_kw_extracted_result') TEXT_SUMMARIZED_TABLE = tn('pyodps_test_ml_text_summarized_result') COUNT_NGRAM_TABLE = tn('pyodps_test_ml_count_ngram_result')
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.clustering import * from odps.ml.metrics import * from odps.ml.tests.base import MLTestBase, tn, ci_skip_case import logging logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IONOSPHERE_CLUSTER_LABEL_TABLE = tn('pyodps_test_ml_iono_cluster_label') IONOSPHERE_CLUSTER_MODEL = tn('pyodps_test_ml_kmeans_model') class TestMLClustering(MLTestBase): def setUp(self): super(TestMLClustering, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) @ci_skip_case def test_kmeans(self): self.delete_table(IONOSPHERE_CLUSTER_LABEL_TABLE) self.delete_offline_model(IONOSPHERE_CLUSTER_MODEL) df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) labeled, model = KMeans(center_count=3).transform(
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.text import * from odps.ml.tests.base import MLTestBase, tn CORPUS_TABLE = tn('pyodps_test_ml_corpus') WORD_TRIPLE_TABLE = tn('pyodps_test_ml_word_triple') SPLITED_TABLE = tn('pyodps_test_ml_splited_text') NOISE_TABLE = tn('pyodps_test_ml_noises') W2V_TABLE = tn('pyodps_test_ml_w2v') TFIDF_TABLE = tn('pyodps_test_ml_tf_idf') LDA_TABLE = tn('pyodps_test_ml_plda') STR_COMP_TABLE = tn('pyodps_test_ml_str_comp') COMP_RESULT_TABLE = tn('pyodps_test_ml_str_comp_result') TOP_N_TABLE = tn('pyodps_test_ml_top_n_result') FILTERED_WORDS_TABLE = tn('pyodps_test_ml_filtered_words_result') KW_EXTRACTED_TABLE = tn('pyodps_test_ml_kw_extracted_result') TEXT_SUMMARIZED_TABLE = tn('pyodps_test_ml_text_summarized_result') COUNT_NGRAM_TABLE = tn('pyodps_test_ml_count_ngram_result') DOC2VEC_DOC_TABLE = tn('pyodps_test_ml_doc2vec_doc_result') SEMANTIC_DIST_TABLE = tn('pyodps_test_ml_semantic_dist_result')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.statistics import * from odps.ml.tests.base import MLTestBase, tn IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IRIS_TABLE = tn('pyodps_test_ml_iris') IONOSPHERE_PRINCOMP_TABLE = tn('pyodps_test_ml_iono_princomp') IONOSPHERE_FEATURE_STATS = tn('pyodps_test_ml_iono_feature_stats') IONOSPHERE_REPLACE_WOE = tn('pyodps_test_ml_iono_replace_woe') IONOSPHERE_QUANTILE_TABLE = tn('pyodps_test_ml_iono_quantile') class TestStatistics(MLTestBase): def setUp(self): super(TestStatistics, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) def test_histograms(self): options.ml.dry_run = True
# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function from odps.config import options from odps.df import DataFrame from odps.ml.recommend import * from odps.ml.tests.base import MLTestBase, tn USER_ITEM_TABLE = tn("pyodps_test_ml_user_item_table") USER_ITEM_PAYLOAD_TABLE = tn("pyodps_test_ml_user_item_payload_table") ASSOC_RESULT_TABLE = tn("pyodps_test_ml_assoc_result") ETREC_RESULT_TABLE = tn("pyodps_test_ml_etrec_result") ALSCF_RESULT_TABLE = tn("pyodps_test_ml_als_cf_result") ALSCF_RECOMMEND_TABLE = tn("pyodps_test_ml_als_cf_rec") SVDCF_RESULT_TABLE = tn("pyodps_test_ml_svd_cf_result") SVDCF_RECOMMEND_TABLE = tn("pyodps_test_ml_svd_cf_rec") class TestRecommend(MLTestBase): def setUp(self): super(TestRecommend, self).setUp() options.runner.dry_run = True def test_etrec(self):
# under the License. from __future__ import print_function import logging from odps.config import options from odps.df import DataFrame from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.classifiers import * from odps.ml.preprocess import * from odps.ml.tests.base import MLTestBase, tn, ci_skip_case logger = logging.getLogger(__name__) IONOSPHERE_TABLE_ONE_PART = tn(TEMP_TABLE_PREFIX + 'ionosphere_one_part') IONOSPHERE_TABLE_TWO_PARTS = tn(TEMP_TABLE_PREFIX + 'ionosphere_two_parts') IONOSPHERE_NORMALIZED_TABLE = tn(TEMP_TABLE_PREFIX + 'iono_normalized_part') TEST_OUTPUT_TABLE_NAME = tn(TEMP_TABLE_PREFIX + 'out_parted') MODEL_NAME = tn('pyodps_test_out_model') class TestPartitions(MLTestBase): def setUp(self): super(TestPartitions, self).setUp() def tearDown(self): super(TestPartitions, self).tearDown() def test_logistic_one_part_input(self):
# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import json from odps import utils from odps.ml import utils as ml_utils from odps.ml.models import TablesModel, PmmlModel, list_tables_model from odps.ml.tests.base import MLTestBase, tn TEST_LR_MODEL_NAME = tn("pyodps_test_lr_model") TEST_TABLE_MODEL_NAME = tn("pyodps_table_model") IONOSPHERE_TABLE = tn("pyodps_test_ml_ionosphere") class TestBaseModel(MLTestBase): def test_odps_model(self): self.create_test_pmml_model(TEST_LR_MODEL_NAME) model = PmmlModel(self.odps.get_offline_model(TEST_LR_MODEL_NAME)) self.assertEqual(model._bind_node.code_name, "pmml_input") self.assertEqual(model._bind_node.parameters["modelName"], TEST_LR_MODEL_NAME) def test_tables_model(self): model_comment = dict(className="odps.ml.models.TablesModel", key="value") model_table_name1 = "".join(
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from collections import namedtuple from odps.df import DataFrame from odps.ml.text import * from odps.ml.classifiers import * from odps.ml.pipeline import Pipeline, FeatureUnion from odps.ml.pipeline.core import PipelineStep from odps.ml.tests.base import MLTestBase, tn, ci_skip_case CORPUS_TABLE = tn('pyodps_test_ml_corpus') W2V_TABLE = tn('pyodps_test_ml_w2v') TFIDF_TABLE = tn('pyodps_test_ml_tf_idf') LDA_TABLE = tn('pyodps_test_ml_plda') IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IONOSPHERE_LR_MODEL = tn('pyodps_test_out_model') class MockTransformStep(PipelineStep): def __init__(self, test_cls, step_name, action=None, params=None, outputs=None):
# limitations under the License. from __future__ import print_function import logging from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.classifiers import * from odps.ml.metrics.classification import roc_curve from odps.ml.tests.base import MLTestBase, tn, ci_skip_case logger = logging.getLogger(__name__) IRIS_KV_TABLE = tn('pyodps_test_ml_iris_sparse') LR_TEST_TABLE = tn('pyodps_lr_output_table') XGBOOST_TEST_TABLE = tn('pyodps_xgboost_output_table') MODEL_NAME = tn('pyodps_test_out_model') class TestSparseClassifiers(MLTestBase): def setUp(self): super(TestSparseClassifiers, self).setUp() self.create_iris_kv(IRIS_KV_TABLE) self.df = DataFrame(self.odps.get_table(IRIS_KV_TABLE)).label_field('category').key_value('content') def tearDown(self): super(TestSparseClassifiers, self).tearDown()
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import logging from odps.config import options from odps.df import DataFrame from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.classifiers import * from odps.ml.tests.base import MLTestBase, tn logger = logging.getLogger(__name__) IONOSPHERE_TABLE_ONE_PART = tn(TEMP_TABLE_PREFIX + 'ionosphere_one_part') IONOSPHERE_TABLE_TWO_PARTS = tn(TEMP_TABLE_PREFIX + 'ionosphere_two_parts') TEST_OUTPUT_TABLE_NAME = tn(TEMP_TABLE_PREFIX + 'out_parted') MODEL_NAME = tn('pyodps_test_out_model') class TestPartitions(MLTestBase): def setUp(self): super(TestPartitions, self).setUp() def tearDown(self): super(TestPartitions, self).tearDown() def test_logistic_one_part_input(self): options.ml.dry_run = True
from __future__ import print_function import logging from odps.config import options from odps.df import DataFrame from odps.ml.classifiers import * from odps.ml.feature import * from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.metrics import roc_curve, roc_auc_score, confusion_matrix from odps.ml.tests.base import MLTestBase, tn, ci_skip_case logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') LR_TEST_TABLE = tn('pyodps_lr_output_table') XGBOOST_TEST_TABLE = tn('pyodps_xgboost_output_table') RANDOM_FORESTS_TEST_TABLE = tn('pyodps_random_forests_output_table') GBDT_LR_TEST_TABLE = tn('pyodps_gbdt_lr_output_table') LINEAR_SVM_TEST_TABLE = tn('pyodps_linear_svm_output_table') NAIVE_BAYES_TEST_TABLE = tn('pyodps_naive_bayes_output_table') KNN_TEST_TABLE = tn('pyodps_knn_output_table') MODEL_NAME = tn('pyodps_test_out_model') class Test(MLTestBase): def setUp(self): super(Test, self).setUp()
# software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import functools from odps.df import DataFrame from odps.df.expr.expressions import CollectionExpr from odps.ml.tests.base import MLTestBase, tn from odps.runner import DFAdapter, adapter_from_df, PartitionSelection from odps.ml.adapter.op import * from odps.ml.utils import KVConfig TEMP_TABLE_1_NAME = tn('pyodps_test_ops_test_table1') TEMP_TABLE_2_NAME = tn('pyodps_test_ops_test_table2') class TestOp(MLTestBase): def test_base_methods(self): fields = [MLField('f%02d' % fid, 'string', FieldRole.FEATURE) for fid in range(5)] fields_set_singleton = list(DFAdapterOperation._set_singleton_role(fields, {'f00': FieldRole.WEIGHT})) self.assertSetEqual(fields_set_singleton[0].role, set([FieldRole.FEATURE, FieldRole.WEIGHT])) fields_set_singleton2 = list(DFAdapterOperation._set_singleton_role(fields_set_singleton, {'f01': FieldRole.WEIGHT})) self.assertSetEqual(fields_set_singleton2[0].role, set([FieldRole.FEATURE, ])) self.assertSetEqual(fields_set_singleton2[1].role, set([FieldRole.FEATURE, FieldRole.WEIGHT])) fields_set_singleton_expect = list(DFAdapterOperation._set_singleton_role(fields_set_singleton2, {'category': FieldRole.LABEL}))
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from odps.df import DataFrame from odps.ml.classifiers import LogisticRegression from odps.ml.cross_validation import cross_val_score from odps.ml.tests.base import MLTestBase, tn, ci_skip_case IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') class TestCrossValidation(MLTestBase): def setUp(self): super(TestCrossValidation, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) self.df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).roles(label='class') def tearDown(self): super(TestCrossValidation, self).tearDown() @ci_skip_case def test_logistic_regression(self): lr = LogisticRegression(epsilon=0.001).set_max_iter(50) print(cross_val_score(lr, self.df))
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.statistics import * from odps.ml.tests.base import MLTestBase, tn IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IRIS_TABLE = tn('pyodps_test_ml_iris') IONOSPHERE_PRINCOMP_TABLE = tn('pyodps_test_ml_iono_princomp') IONOSPHERE_FEATURE_STATS = tn('pyodps_test_ml_iono_feature_stats') IONOSPHERE_REPLACE_WOE = tn('pyodps_test_ml_iono_replace_woe') IONOSPHERE_QUANTILE_TABLE = tn('pyodps_test_ml_iono_quantile') class TestStatistics(MLTestBase): def setUp(self): super(TestStatistics, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) def test_histograms(self): options.runner.dry_run = True
# under the License. from __future__ import print_function import logging from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.classifiers import * from odps.ml.metrics.classification import roc_curve from odps.ml.tests.base import MLTestBase, tn, ci_skip_case logger = logging.getLogger(__name__) IRIS_KV_TABLE = tn('pyodps_test_ml_iris_sparse') LR_TEST_TABLE = tn('pyodps_lr_output_table') XGBOOST_TEST_TABLE = tn('pyodps_xgboost_output_table') MODEL_NAME = tn('pyodps_test_out_model') class TestSparseClassifiers(MLTestBase): def setUp(self): super(TestSparseClassifiers, self).setUp() self.create_iris_kv(IRIS_KV_TABLE) self.df = DataFrame(self.odps.get_table(IRIS_KV_TABLE)).label_field('category').key_value('content') def tearDown(self): super(TestSparseClassifiers, self).tearDown()
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function from odps.df import DataFrame from odps.config import options from odps.ml.clustering import * from odps.ml.metrics import * from odps.ml.tests.base import MLTestBase, tn, ci_skip_case import logging logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IONOSPHERE_CLUSTER_LABEL_TABLE = tn('pyodps_test_ml_iono_cluster_label') IONOSPHERE_CLUSTER_MODEL = tn('pyodps_test_ml_kmeans_model') class TestMLClustering(MLTestBase): def setUp(self): super(TestMLClustering, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) @ci_skip_case def test_kmeans(self): self.delete_table(IONOSPHERE_CLUSTER_LABEL_TABLE) self.delete_offline_model(IONOSPHERE_CLUSTER_MODEL) df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)) labeled, model = KMeans(center_count=3).transform(df.exclude_fields('class'))
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import functools from odps.df.expr.expressions import CollectionExpr from odps.df.expr.tests.core import MockTable from odps.df.types import validate_data_type from odps.ml.expr.op import * from odps.ml.tests.base import MLTestBase, tn from odps.ml.utils import KVConfig from odps.models.table import TableSchema as Schema TEMP_TABLE_1_NAME = tn('pyodps_test_ops_test_table1') TEMP_TABLE_2_NAME = tn('pyodps_test_ops_test_table2') datatypes = lambda *types: [validate_data_type(t) for t in types] class TestOp(MLTestBase): def testBaseMethods(self): fields = [ MLField('f%02d' % fid, 'string', FieldRole.FEATURE) for fid in range(5) ] fields_set_singleton = list( DFOperation._set_singleton_role(fields, {'f00': FieldRole.WEIGHT})) self.assertSetEqual(fields_set_singleton[0].role, set([FieldRole.FEATURE, FieldRole.WEIGHT]))
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from odps.df import DataFrame from odps.ml import merge_data from odps.ml.preprocess import * from odps.ml.tests.base import MLTestBase, tn, ci_skip_case IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') IONOSPHERE_RANDOM_SAMPLE_TABLE = tn('pyodps_test_ml_iono_rand_sample') IONOSPHERE_WEIGHTED_SAMPLE_TABLE = tn('pyodps_test_ml_iono_weight_sample') IONOSPHERE_APPEND_ID_TABLE = tn('pyodps_test_ml_iono_append_id') IONOSPHERE_MERGED_TABLE = tn('pyodps_test_ml_iono_merged') IONOSPHERE_PRINCOMP_TABLE = tn('pyodps_test_ml_iono_princomp') IONOSPHERE_ABNORMAL_TABLE = tn('pyodps_test_ml_iono_abnormal') USER_ITEM_TABLE = tn('pyodps_test_ml_user_item') USER_ITEM_UNPIVOT_TABLE = tn('pyodps_test_ml_unpivot_user_item') class TestPreprocess(MLTestBase): def setUp(self): super(TestPreprocess, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE)
# specific language governing permissions and limitations # under the License. from __future__ import print_function import logging from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.network import * from odps.ml.tests.base import MLTestBase, tn logger = logging.getLogger(__name__) WEIGHTED_GRAPH_EDGE_TABLE = tn('pyodps_test_ml_weighted_graph_edge') WEIGHTED_GRAPH_VERTEX_TABLE = tn('pyodps_test_ml_weighted_graph_node') TREE_GRAPH_EDGE_TABLE = tn('pyodps_test_ml_tree_graph_edge') NODE_DENSITY_TABLE = tn('pyodps_test_ml_node_density') EDGE_DENSITY_TABLE = tn('pyodps_test_ml_edge_density') MAXIMAL_CONNECTED_TABLE = tn('pyodps_test_ml_maximal_connected') TRIANGLE_COUNT_TABLE = tn('pyodps_test_ml_triangle_count') PAGE_RANK_TABLE = tn('pyodps_test_ml_page_rank') LABEL_PROPAGATION_TABLE = tn('pyodps_test_ml_label_prop') K_CORE_TABLE = tn('pyodps_test_ml__k_core') SSSP_TABLE = tn('pyodps_test_ml_sssp') TREE_DEPTH_TABLE = tn('pyodps_test_ml_tree_depth') class Test(MLTestBase):
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import logging from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.network import * from odps.ml.tests.base import MLTestBase, tn logger = logging.getLogger(__name__) WEIGHTED_GRAPH_EDGE_TABLE = tn('pyodps_test_ml_weighted_graph_edge') WEIGHTED_GRAPH_VERTEX_TABLE = tn('pyodps_test_ml_weighted_graph_node') TREE_GRAPH_EDGE_TABLE = tn('pyodps_test_ml_tree_graph_edge') NODE_DENSITY_TABLE = tn('pyodps_test_ml_node_density') EDGE_DENSITY_TABLE = tn('pyodps_test_ml_edge_density') MAXIMAL_CONNECTED_TABLE = tn('pyodps_test_ml_maximal_connected') TRIANGLE_COUNT_TABLE = tn('pyodps_test_ml_triangle_count') PAGE_RANK_TABLE = tn('pyodps_test_ml_page_rank') LABEL_PROPAGATION_TABLE = tn('pyodps_test_ml_label_prop') K_CORE_TABLE = tn('pyodps_test_ml__k_core') SSSP_TABLE = tn('pyodps_test_ml_sssp') TREE_DEPTH_TABLE = tn('pyodps_test_ml_tree_depth') class Test(MLTestBase):
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from odps import options, DataFrame from odps.ml import PmmlModel from odps.ml.feature import * from odps.ml.tests.base import MLTestBase, tn TEST_LR_MODEL_NAME = tn('pyodps_test_lr_model') IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') SELECT_FEATURE_OUTPUT_TABLE = tn('pyodps_test_ml_select_feature_output') class Test(MLTestBase): def setUp(self): super(Test, self).setUp() self.create_test_pmml_model(TEST_LR_MODEL_NAME) self.create_ionosphere(IONOSPHERE_TABLE) self.df = DataFrame(self.odps.get_table(IONOSPHERE_TABLE)).label_field('class') self.model = PmmlModel(self.odps.get_offline_model(TEST_LR_MODEL_NAME)) options.runner.dry_run = True def test_rf_importance(self): rf_importance(self.df, self.model, core_num=1, core_mem=1024, _cases=self.gen_check_params_case({
from __future__ import print_function import logging import sys from odps.df import DataFrame from odps.config import options from odps.ml.utils import TEMP_TABLE_PREFIX from odps.ml.algolib import * from odps.ml.algolib.loader import load_classifiers from odps.ml.tests.base import MLTestBase, tn logger = logging.getLogger(__name__) IONOSPHERE_TABLE = tn('pyodps_test_ml_ionosphere') MODEL_NAME = tn('pyodps_test_out_model') class TestAlgoBuild(MLTestBase): def setUp(self): super(TestAlgoBuild, self).setUp() self.create_ionosphere(IONOSPHERE_TABLE) self.register_algorithm() def tearDown(self): super(TestAlgoBuild, self).tearDown() def register_algorithm(self): algo_def = XflowAlgorithmDef('MyNaiveBayes',