Ejemplo n.º 1
0
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data,
            F, *args, **kwargs):

    (M, ) = model

    X = preprocessing.scale(
        np.hstack([
            # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids)
            features.featurize('CoreNLP_sentence_info', None, test_files,
                               CoreNLP_test_data),
            features.featurize('liwc', F['LIWC'], test_ids),
            features.featurize(
                'MRC_bag_of_words',
                F['MRC_bag_of_words'],
                test_ids,
                project.CoreNLP.tokens_with_key(CoreNLP_test_data),
                binary=True),
            features.featurize('dependency_relations',
                               F['dependency_relations'],
                               test_ids,
                               CoreNLP_test_data,
                               binary=True),
            features.featurize('production_rules',
                               F['production_rules'],
                               test_ids,
                               CoreNLP_test_data,
                               binary=True)
        ]))

    return M.predict(X)
Ejemplo n.º 2
0
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    X = np.hstack([
        # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids)
         features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data)
        ,features.featurize('liwc', F['LIWC'], train_ids)
        ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True)
        ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True)
    ])

    pca = PCA()
    M   = LogisticRegression()

    info("> Running PCA...")
    pca.fit(X)
    pipe = Pipeline(steps=[('pca', pca), ('logistic', M)])

    n_components = [25, 50, 100, 250, 500]
    Cs = np.logspace(-4, 4, 3)

    info("Running gridsearch...")
    estimator = GridSearchCV(pipe, {'pca__n_components': n_components, 'logistic__C': Cs})

    info("Fitting...")
    estimator.fit(X, Y)

    return (estimator,)
Ejemplo n.º 3
0
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F,
          *args, **kwargs):

    X = preprocessing.scale(
        np.hstack([
            # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids)
            features.featurize('CoreNLP_sentence_info', None, train_files,
                               CoreNLP_train_data),
            features.featurize('liwc', F['LIWC'], train_ids),
            features.featurize(
                'MRC_bag_of_words',
                F['MRC_bag_of_words'],
                train_ids,
                project.CoreNLP.tokens_with_key(CoreNLP_train_data),
                binary=True),
            features.featurize('dependency_relations',
                               F['dependency_relations'],
                               train_ids,
                               CoreNLP_train_data,
                               binary=True),
            features.featurize('production_rules',
                               F['production_rules'],
                               train_ids,
                               CoreNLP_train_data,
                               binary=True)
        ]))

    M = svm.LinearSVC()
    M.fit(X, Y)

    return (M, )
Ejemplo n.º 4
0
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    (M1, M2, M3) = model

    X = np.hstack([
       # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids)
       features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data)
      ,features.featurize('liwc', F['LIWC'], test_ids)
      ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True)
      ,features.featurize('dependency_relations', F['dependency_relations'], test_ids, CoreNLP_test_data, binary=True)
      ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True)
    ])

    Y1 = M1.predict(X)
    Y2 = M2.predict(preprocessing.scale(X))
    Y3 = M3.predict(X)
    Y  = majority_vote(Y1, Y2, Y3)

    y1n = len(Y1)
    y2n = len(Y2)
    y3n = len(Y3)
    yn  = len(Y)
    assert(y1n == y2n and y2n == y3n and y3n == yn)

    return Y
Ejemplo n.º 5
0
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    (M,) = model

    X = preprocessing.scale(np.hstack([
      # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids)
       features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data)
      ,features.featurize('liwc', F['LIWC'], test_ids)
      ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True)
      ,features.featurize('dependency_relations', F['dependency_relations'], test_ids, CoreNLP_test_data, binary=True)
      ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True)
    ]))

    return M.predict(X)
Ejemplo n.º 6
0
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    X = preprocessing.scale(np.hstack([
      # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids)
       features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data)
      ,features.featurize('liwc', F['LIWC'], train_ids)
      ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True)
      ,features.featurize('dependency_relations', F['dependency_relations'], train_ids, CoreNLP_train_data, binary=True)
      ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True)
    ]))

    M = svm.LinearSVC()
    M.fit(X, Y)

    return (M,)
Ejemplo n.º 7
0
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    (M,) = model

    X = np.hstack([
         # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids)
          features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data)
         ,features.featurize('liwc', F['LIWC'], test_ids)
         ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True)
         ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True)
    ])

    info("Predicting...")

    return estimator.predict(X)
Ejemplo n.º 8
0
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs):

    X = np.hstack([
       # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids)
       features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data)
      ,features.featurize('liwc', F['LIWC'], train_ids)
      ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True)
      ,features.featurize('dependency_relations', F['dependency_relations'], train_ids, CoreNLP_train_data, binary=True)
      ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True)
    ])

    M1 = LogisticRegression()
    M2 = LinearSVC()
    M3 = BernoulliNB()

    M1.fit(X, Y)
    M2.fit(preprocessing.scale(X), Y)
    M3.fit(X, Y)

    return (M1, M2, M3)