Example #1
0
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import numpy as np

from jsonio import read_infile_as_json, read_outfile_as_json
from features import TopicBOW, TopicNum
from evaluation import evaluate_classification

if __name__ == '__main__':
    train_json, test_json = read_infile_as_json('./data/answered_data_10k.in')
    train_y = np.asarray(map(lambda x: x['__ans__'], train_json))

    test_labels_json = read_outfile_as_json('./data/answered_data_10k.out')
    test_labels_dict = {
        i['question_key']: i['__ans__']
        for i in test_labels_json
    }

    # Output should be ordered according to test_json
    test_y = np.asarray(
        [test_labels_dict[x['question_key']] for x in test_json])

    feature_extractors = FeatureUnion([("TopicBOW", TopicBOW()),
                                       ("TopicNum", TopicNum())])
    m = Pipeline(
        steps=[("features",
                feature_extractors), ('LR', LogisticRegression(penalty="l1"))])

    m.fit(train_json, train_y)
    pred = m.predict(test_json)
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

from jsonio import read_infile_as_json, read_outfile_as_json
from features import ContextTopicFollowers, TopicNum, Anonymous
from evaluation import evaluate_rmsle


if __name__ == '__main__':
    train_json, test_json = read_infile_as_json('./data/input00.in')
    train_y = np.asarray(map(lambda x: x['__ans__'], train_json))

    test_labels_json = read_outfile_as_json('./data/output00.out')
    test_labels_dict = {i['question_key']: i['__ans__'] for i in test_labels_json}

    test_y = np.asarray([test_labels_dict[x['question_key']] for x in test_json])

    feature_extractors = FeatureUnion([('ContextTopicFollowers', ContextTopicFollowers()),
                                       ('TopicNum', TopicNum()),
                                       ('Anonymous', Anonymous())])
    m = Pipeline(steps=[("features", feature_extractors),
                        ('LR', GradientBoostingRegressor(n_estimators=300, max_depth=4))])

    m.fit(train_json, np.log(train_y + 1))
    pred = np.exp(m.predict(test_json)) - 1

    print(evaluate_rmsle(pred, test_y))
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import numpy as np

from jsonio import read_infile_as_json, read_outfile_as_json
from features import TopicBOW, TopicNum
from evaluation import evaluate_classification


if __name__ == '__main__':
    train_json, test_json = read_infile_as_json('./data/answered_data_10k.in')
    train_y = np.asarray(map(lambda x: x['__ans__'], train_json))

    test_labels_json = read_outfile_as_json('./data/answered_data_10k.out')
    test_labels_dict = {i['question_key']: i['__ans__'] for i in test_labels_json}

    # Output should be ordered according to test_json
    test_y = np.asarray([test_labels_dict[x['question_key']] for x in test_json])

    feature_extractors = FeatureUnion([("TopicBOW", TopicBOW()), ("TopicNum", TopicNum())])
    m = Pipeline(steps=[("features", feature_extractors),
                        ('LR', LogisticRegression(penalty="l1"))])

    m.fit(train_json, train_y)
    pred = m.predict(test_json)

    print(evaluate_classification(pred, test_y))