def check_print_model_weights(task='classification'): # create some simple classification or regression data if task == 'classification': train_fs, _ = make_classification_data(train_test_ratio=0.8) else: train_fs, _, _ = make_regression_data(num_features=4, train_test_ratio=0.8) # now train the appropriate model if task == 'classification': learner = Learner('LogisticRegression') learner.train(train_fs) else: learner = Learner('LinearRegression') learner.train(train_fs, grid_objective='pearson') # now save the model to disk model_file = join(_my_dir, 'output', 'test_print_model_weights.model') learner.save(model_file) # now call print_model_weights main() and capture the output print_model_weights_cmd = [model_file] err = '' try: old_stderr = sys.stderr old_stdout = sys.stdout sys.stderr = mystderr = StringIO() sys.stdout = mystdout = StringIO() pmw.main(print_model_weights_cmd) out = mystdout.getvalue() err = mystderr.getvalue() finally: sys.stderr = old_stderr sys.stdout = old_stdout print(err) # now parse the output of the print_model_weight command # and get the intercept and the feature values if task == 'classification': lines_to_parse = [l for l in out.split('\n')[1:] if l] intercept = safe_float(lines_to_parse[0].split('\t')[0]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[2], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_[0]) assert_allclose(learner.model.coef_[0], feature_values) else: lines_to_parse = [l for l in out.split('\n') if l] intercept = safe_float(lines_to_parse[0].split('=')[1]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[1], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_, feature_values)
def check_print_model_weights(task='classification'): # create some simple classification or regression data if task == 'classification' or task == 'classification_no_intercept': train_fs, _ = make_classification_data(train_test_ratio=0.8) elif task == 'multiclass_classification': train_fs, _ = make_classification_data(train_test_ratio=0.8, num_labels=3) else: train_fs, _, _ = make_regression_data(num_features=4, train_test_ratio=0.8) # now train the appropriate model if task == 'classification' or task == 'multiclass_classification': learner = Learner('LogisticRegression') learner.train(train_fs, grid_objective='f1_score_micro') elif task == 'classification_no_intercept': learner = Learner('LogisticRegression') learner.train(train_fs, grid_objective='f1_score_micro', param_grid=[{'fit_intercept':[False]}]) elif task == 'regression': learner = Learner('LinearRegression') learner.train(train_fs, grid_objective='pearson') else: learner = Learner('LinearSVR') learner.train(train_fs, grid_objective='pearson') # now save the model to disk model_file = join(_my_dir, 'output', 'test_print_model_weights.model') learner.save(model_file) # now call print_model_weights main() and capture the output print_model_weights_cmd = [model_file] err = '' try: old_stderr = sys.stderr old_stdout = sys.stdout sys.stderr = mystderr = StringIO() sys.stdout = mystdout = StringIO() pmw.main(print_model_weights_cmd) out = mystdout.getvalue() err = mystderr.getvalue() finally: sys.stderr = old_stderr sys.stdout = old_stdout print(err) # now parse the output of the print_model_weight command # and get the intercept and the feature values if task == 'classification': lines_to_parse = [l for l in out.split('\n')[1:] if l] intercept = safe_float(lines_to_parse[0].split('\t')[0]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[2], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_[0]) assert_allclose(learner.model.coef_[0], feature_values) elif task == 'multiclass_classification': # for multiple classes we get an intercept for each class # as well as a list of weights for each class lines_to_parse = [l for l in out.split('\n')[1:] if l] intercept = [] for intercept_string in lines_to_parse[0:3]: intercept.append(safe_float(intercept_string.split('\t')[0])) feature_values = [[], [], []] for ltp in lines_to_parse[3:]: fields = ltp.split('\t') feature_values[int(fields[1])].append((fields[2], safe_float(fields[0]))) for index, weights in enumerate(feature_values): feature_values[index] = [t[1] for t in sorted(weights)] for index, weights in enumerate(learner.model.coef_): assert_array_almost_equal(weights, feature_values[index]) assert_array_almost_equal(intercept, learner.model.intercept_) elif task == 'classification_no_intercept': lines_to_parse = [l for l in out.split('\n')[0:] if l] intercept = safe_float(lines_to_parse[0].split('=')[1]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[2], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_[0], feature_values) elif task == 'regression': lines_to_parse = [l for l in out.split('\n') if l] intercept = safe_float(lines_to_parse[0].split('=')[1]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[1], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_, feature_values) else: lines_to_parse = [l for l in out.split('\n') if l] intercept_list = ast.literal_eval(lines_to_parse[0].split('=')[1].strip()) intercept = [] for intercept_string in intercept_list: intercept.append(safe_float(intercept_string)) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[1], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_array_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_, feature_values)
def check_print_model_weights(task='classification'): # create some simple classification or regression data if task == 'classification': train_fs, _ = make_classification_data(train_test_ratio=0.8) elif task == 'multiclass_classification': train_fs, _ = make_classification_data(train_test_ratio=0.8, num_labels=3) else: train_fs, _, _ = make_regression_data(num_features=4, train_test_ratio=0.8) # now train the appropriate model if task == 'classification' or task == 'multiclass_classification': learner = Learner('LogisticRegression') learner.train(train_fs, grid_objective='f1_score_micro') elif task == 'regression': learner = Learner('LinearRegression') learner.train(train_fs, grid_objective='pearson') else: learner = Learner('LinearSVR') learner.train(train_fs, grid_objective='pearson') # now save the model to disk model_file = join(_my_dir, 'output', 'test_print_model_weights.model') learner.save(model_file) # now call print_model_weights main() and capture the output print_model_weights_cmd = [model_file] err = '' try: old_stderr = sys.stderr old_stdout = sys.stdout sys.stderr = mystderr = StringIO() sys.stdout = mystdout = StringIO() pmw.main(print_model_weights_cmd) out = mystdout.getvalue() err = mystderr.getvalue() finally: sys.stderr = old_stderr sys.stdout = old_stdout print(err) # now parse the output of the print_model_weight command # and get the intercept and the feature values if task == 'classification': lines_to_parse = [l for l in out.split('\n')[1:] if l] intercept = safe_float(lines_to_parse[0].split('\t')[0]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[2], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_[0]) assert_allclose(learner.model.coef_[0], feature_values) elif task == 'multiclass_classification': # for multiple classes we get an intercept for each class # as well as a list of weights for each class lines_to_parse = [l for l in out.split('\n')[1:] if l] intercept = [] for intercept_string in lines_to_parse[0:3]: intercept.append(safe_float(intercept_string.split('\t')[0])) feature_values = [[], [], []] for ltp in lines_to_parse[3:]: fields = ltp.split('\t') feature_values[int(fields[1])].append((fields[2], safe_float(fields[0]))) for index, weights in enumerate(feature_values): feature_values[index] = [t[1] for t in sorted(weights)] for index, weights in enumerate(learner.model.coef_): assert_array_almost_equal(weights, feature_values[index]) assert_array_almost_equal(intercept, learner.model.intercept_) elif task == 'regression': lines_to_parse = [l for l in out.split('\n') if l] intercept = safe_float(lines_to_parse[0].split('=')[1]) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[1], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_, feature_values) else: lines_to_parse = [l for l in out.split('\n') if l] intercept_list = ast.literal_eval(lines_to_parse[0].split('=')[1].strip()) intercept = [] for intercept_string in intercept_list: intercept.append(safe_float(intercept_string)) feature_values = [] for ltp in lines_to_parse[1:]: fields = ltp.split('\t') feature_values.append((fields[1], safe_float(fields[0]))) feature_values = [t[1] for t in sorted(feature_values)] assert_array_almost_equal(intercept, learner.model.intercept_) assert_allclose(learner.model.coef_, feature_values)