예제 #1
0
def check_print_model_weights(task='classification'):

    # create some simple classification or regression data
    if task == 'classification':
        train_fs, _ = make_classification_data(train_test_ratio=0.8)
    else:
        train_fs, _, _ = make_regression_data(num_features=4,
                                              train_test_ratio=0.8)

    # now train the appropriate model
    if task == 'classification':
        learner = Learner('LogisticRegression')
        learner.train(train_fs)
    else:
        learner = Learner('LinearRegression')
        learner.train(train_fs, grid_objective='pearson')

    # now save the model to disk
    model_file = join(_my_dir, 'output',
                      'test_print_model_weights.model')
    learner.save(model_file)

    # now call print_model_weights main() and capture the output
    print_model_weights_cmd = [model_file]
    err = ''
    try:
        old_stderr = sys.stderr
        old_stdout = sys.stdout
        sys.stderr = mystderr = StringIO()
        sys.stdout = mystdout = StringIO()
        pmw.main(print_model_weights_cmd)
        out = mystdout.getvalue()
        err = mystderr.getvalue()
    finally:
        sys.stderr = old_stderr
        sys.stdout = old_stdout
        print(err)

    # now parse the output of the print_model_weight command
    # and get the intercept and the feature values
    if task == 'classification':
        lines_to_parse = [l for l in out.split('\n')[1:] if l]
        intercept = safe_float(lines_to_parse[0].split('\t')[0])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[2], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_[0])
        assert_allclose(learner.model.coef_[0], feature_values)
    else:
        lines_to_parse = [l for l in out.split('\n') if l]
        intercept = safe_float(lines_to_parse[0].split('=')[1])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[1], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_, feature_values)
예제 #2
0
def check_print_model_weights(task='classification'):

    # create some simple classification or regression data
    if task == 'classification' or task == 'classification_no_intercept':
        train_fs, _ = make_classification_data(train_test_ratio=0.8)
    elif task == 'multiclass_classification':
        train_fs, _ = make_classification_data(train_test_ratio=0.8, num_labels=3)
    else:
        train_fs, _, _ = make_regression_data(num_features=4,
                                              train_test_ratio=0.8)

    # now train the appropriate model
    if task == 'classification' or task == 'multiclass_classification':
        learner = Learner('LogisticRegression')
        learner.train(train_fs, grid_objective='f1_score_micro')
    elif task == 'classification_no_intercept':
        learner = Learner('LogisticRegression')
        learner.train(train_fs, grid_objective='f1_score_micro', param_grid=[{'fit_intercept':[False]}])
    elif task == 'regression':
        learner = Learner('LinearRegression')
        learner.train(train_fs, grid_objective='pearson')
    else:
        learner = Learner('LinearSVR')
        learner.train(train_fs, grid_objective='pearson')

    # now save the model to disk
    model_file = join(_my_dir, 'output',
                      'test_print_model_weights.model')
    learner.save(model_file)

    # now call print_model_weights main() and capture the output
    print_model_weights_cmd = [model_file]
    err = ''
    try:
        old_stderr = sys.stderr
        old_stdout = sys.stdout
        sys.stderr = mystderr = StringIO()
        sys.stdout = mystdout = StringIO()
        pmw.main(print_model_weights_cmd)
        out = mystdout.getvalue()
        err = mystderr.getvalue()
    finally:
        sys.stderr = old_stderr
        sys.stdout = old_stdout
        print(err)

    # now parse the output of the print_model_weight command
    # and get the intercept and the feature values
    if task == 'classification':
        lines_to_parse = [l for l in out.split('\n')[1:] if l]
        intercept = safe_float(lines_to_parse[0].split('\t')[0])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[2], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_[0])
        assert_allclose(learner.model.coef_[0], feature_values)
    elif task == 'multiclass_classification':
        # for multiple classes we get an intercept for each class
        # as well as a list of weights for each class

        lines_to_parse = [l for l in out.split('\n')[1:] if l]
        intercept = []
        for intercept_string in lines_to_parse[0:3]:
            intercept.append(safe_float(intercept_string.split('\t')[0]))

        feature_values = [[], [], []]
        for ltp in lines_to_parse[3:]:
            fields = ltp.split('\t')
            feature_values[int(fields[1])].append((fields[2], safe_float(fields[0])))

        for index, weights in enumerate(feature_values):
            feature_values[index] = [t[1] for t in sorted(weights)]

        for index, weights in enumerate(learner.model.coef_):
            assert_array_almost_equal(weights, feature_values[index])

        assert_array_almost_equal(intercept, learner.model.intercept_)
    elif task == 'classification_no_intercept':
        lines_to_parse = [l for l in out.split('\n')[0:] if l]
        intercept = safe_float(lines_to_parse[0].split('=')[1])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[2], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_[0], feature_values)
    elif task == 'regression':
        lines_to_parse = [l for l in out.split('\n') if l]
        intercept = safe_float(lines_to_parse[0].split('=')[1])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[1], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_, feature_values)
    else:
        lines_to_parse = [l for l in out.split('\n') if l]

        intercept_list = ast.literal_eval(lines_to_parse[0].split('=')[1].strip())
        intercept = []
        for intercept_string in intercept_list:
            intercept.append(safe_float(intercept_string))

        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[1], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]

        assert_array_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_, feature_values)
예제 #3
0
def check_print_model_weights(task='classification'):

    # create some simple classification or regression data
    if task == 'classification':
        train_fs, _ = make_classification_data(train_test_ratio=0.8)
    elif task == 'multiclass_classification':
        train_fs, _ = make_classification_data(train_test_ratio=0.8, num_labels=3)
    else:
        train_fs, _, _ = make_regression_data(num_features=4,
                                              train_test_ratio=0.8)

    # now train the appropriate model
    if task == 'classification' or task == 'multiclass_classification':
        learner = Learner('LogisticRegression')
        learner.train(train_fs, grid_objective='f1_score_micro')
    elif task == 'regression':
        learner = Learner('LinearRegression')
        learner.train(train_fs, grid_objective='pearson')
    else:
        learner = Learner('LinearSVR')
        learner.train(train_fs, grid_objective='pearson')

    # now save the model to disk
    model_file = join(_my_dir, 'output',
                      'test_print_model_weights.model')
    learner.save(model_file)

    # now call print_model_weights main() and capture the output
    print_model_weights_cmd = [model_file]
    err = ''
    try:
        old_stderr = sys.stderr
        old_stdout = sys.stdout
        sys.stderr = mystderr = StringIO()
        sys.stdout = mystdout = StringIO()
        pmw.main(print_model_weights_cmd)
        out = mystdout.getvalue()
        err = mystderr.getvalue()
    finally:
        sys.stderr = old_stderr
        sys.stdout = old_stdout
        print(err)

    # now parse the output of the print_model_weight command
    # and get the intercept and the feature values
    if task == 'classification':
        lines_to_parse = [l for l in out.split('\n')[1:] if l]
        intercept = safe_float(lines_to_parse[0].split('\t')[0])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[2], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_[0])
        assert_allclose(learner.model.coef_[0], feature_values)
    elif task == 'multiclass_classification':
        # for multiple classes we get an intercept for each class
        # as well as a list of weights for each class

        lines_to_parse = [l for l in out.split('\n')[1:] if l]
        intercept = []
        for intercept_string in lines_to_parse[0:3]:
            intercept.append(safe_float(intercept_string.split('\t')[0]))

        feature_values = [[], [], []]
        for ltp in lines_to_parse[3:]:
            fields = ltp.split('\t')
            feature_values[int(fields[1])].append((fields[2], safe_float(fields[0])))

        for index, weights in enumerate(feature_values):
            feature_values[index] = [t[1] for t in sorted(weights)]

        for index, weights in enumerate(learner.model.coef_):
            assert_array_almost_equal(weights, feature_values[index])

        assert_array_almost_equal(intercept, learner.model.intercept_)

    elif task == 'regression':
        lines_to_parse = [l for l in out.split('\n') if l]
        intercept = safe_float(lines_to_parse[0].split('=')[1])
        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[1], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]
        assert_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_, feature_values)
    else:
        lines_to_parse = [l for l in out.split('\n') if l]

        intercept_list = ast.literal_eval(lines_to_parse[0].split('=')[1].strip())
        intercept = []
        for intercept_string in intercept_list:
            intercept.append(safe_float(intercept_string))

        feature_values = []
        for ltp in lines_to_parse[1:]:
            fields = ltp.split('\t')
            feature_values.append((fields[1], safe_float(fields[0])))
        feature_values = [t[1] for t in sorted(feature_values)]

        assert_array_almost_equal(intercept, learner.model.intercept_)
        assert_allclose(learner.model.coef_, feature_values)