def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): (M, ) = model X = preprocessing.scale( np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids) features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data), features.featurize('liwc', F['LIWC'], test_ids), features.featurize( 'MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True), features.featurize('dependency_relations', F['dependency_relations'], test_ids, CoreNLP_test_data, binary=True), features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True) ])) return M.predict(X)
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): X = np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids) features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data) ,features.featurize('liwc', F['LIWC'], train_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True) ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True) ]) pca = PCA() M = LogisticRegression() info("> Running PCA...") pca.fit(X) pipe = Pipeline(steps=[('pca', pca), ('logistic', M)]) n_components = [25, 50, 100, 250, 500] Cs = np.logspace(-4, 4, 3) info("Running gridsearch...") estimator = GridSearchCV(pipe, {'pca__n_components': n_components, 'logistic__C': Cs}) info("Fitting...") estimator.fit(X, Y) return (estimator,)
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): X = preprocessing.scale( np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids) features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data), features.featurize('liwc', F['LIWC'], train_ids), features.featurize( 'MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True), features.featurize('dependency_relations', F['dependency_relations'], train_ids, CoreNLP_train_data, binary=True), features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True) ])) M = svm.LinearSVC() M.fit(X, Y) return (M, )
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): (M1, M2, M3) = model X = np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids) features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data) ,features.featurize('liwc', F['LIWC'], test_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True) ,features.featurize('dependency_relations', F['dependency_relations'], test_ids, CoreNLP_test_data, binary=True) ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True) ]) Y1 = M1.predict(X) Y2 = M2.predict(preprocessing.scale(X)) Y3 = M3.predict(X) Y = majority_vote(Y1, Y2, Y3) y1n = len(Y1) y2n = len(Y2) y3n = len(Y3) yn = len(Y) assert(y1n == y2n and y2n == y3n and y3n == yn) return Y
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): (M,) = model X = preprocessing.scale(np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids) features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data) ,features.featurize('liwc', F['LIWC'], test_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True) ,features.featurize('dependency_relations', F['dependency_relations'], test_ids, CoreNLP_test_data, binary=True) ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True) ])) return M.predict(X)
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): X = preprocessing.scale(np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids) features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data) ,features.featurize('liwc', F['LIWC'], train_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True) ,features.featurize('dependency_relations', F['dependency_relations'], train_ids, CoreNLP_train_data, binary=True) ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True) ])) M = svm.LinearSVC() M.fit(X, Y) return (M,)
def predict(model, test_files, test_ids, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): (M,) = model X = np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], test_ids) features.featurize('CoreNLP_sentence_info', None, test_files, CoreNLP_test_data) ,features.featurize('liwc', F['LIWC'], test_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], test_ids, project.CoreNLP.tokens_with_key(CoreNLP_test_data), binary=True) ,features.featurize('production_rules', F['production_rules'], test_ids, CoreNLP_test_data, binary=True) ]) info("Predicting...") return estimator.predict(X)
def train(train_files, train_ids, Y, CoreNLP_train_data, CoreNLP_test_data, F, *args, **kwargs): X = np.hstack([ # features.featurize('binary_bag_of_words', F['bag_of_words'], train_ids) features.featurize('CoreNLP_sentence_info', None, train_files, CoreNLP_train_data) ,features.featurize('liwc', F['LIWC'], train_ids) ,features.featurize('MRC_bag_of_words', F['MRC_bag_of_words'], train_ids, project.CoreNLP.tokens_with_key(CoreNLP_train_data), binary=True) ,features.featurize('dependency_relations', F['dependency_relations'], train_ids, CoreNLP_train_data, binary=True) ,features.featurize('production_rules', F['production_rules'], train_ids, CoreNLP_train_data, binary=True) ]) M1 = LogisticRegression() M2 = LinearSVC() M3 = BernoulliNB() M1.fit(X, Y) M2.fit(preprocessing.scale(X), Y) M3.fit(X, Y) return (M1, M2, M3)