def __init__(self, start_train, end_train, start_test, end_test): self.sentences = vpe.AllSentences() self.annotations = vpe.Annotations() self.file_names = Files() self.all_auxiliaries = vpe.Auxiliaries() self.gold_standard_auxs = vpe.Auxiliaries() self.hyperplane = None self.features = [] """ Train and test_vectors are lists of csr_matrices in order to save memory. """ self.m = None self.m2 = None self.train_vectors = [] self.train_classes = [] self.test_vectors = [] self.test_classes = [] self.predictions = [] self.result_vector = [] self.pre_oversample_length = 0 self.start_train = start_train self.end_train = end_train self.start_test = start_test self.end_test = end_test
from numpy import dot from copy import copy, deepcopy from random import shuffle MODALS = [ 'can', 'could', 'may', 'must', 'might', 'will', 'would', 'shall', 'should' ] BE = ['be'] HAVE = ['have'] DO = ['do'] TO = ['to'] SO = ['so', 'same', 'likewise', 'opposite'] AUX_LEMMAS = MODALS + BE + HAVE + DO + TO + SO ALL_CATEGORIES = [MODALS, BE, HAVE, DO, TO, SO] ALL_AUXILIARIES = Files().extract_data_from_file(Files.UNIQUE_AUXILIARIES_FILE) EMPTY_DEP = 'NONE' """ ---- Exception classes. ---- """ class AuxiliaryHasNoTypeException(BaseException): def __init__(self, aux_name): print 'The following auxiliary, %s, has no category!' % aux_name class EmptySentDictException(BaseException): def __init__(self): pass class GoldStandardComesFromRawException(BaseException):
import word_characteristics as wc import numpy as np import nltktree as nt import warnings from file_names import Files from os import listdir from sys import argv from sklearn.cross_validation import KFold from sklearn.preprocessing import StandardScaler from sklearn.metrics import precision_score, recall_score, f1_score from sklearn.linear_model import LogisticRegression, LogisticRegressionCV from sklearn.svm import SVC, LinearSVC, NuSVC from sklearn.ensemble import RandomForestClassifier from scipy.sparse import csr_matrix, vstack files = Files() MRG_DATA_FILE = 'dataset_with_features_ALL_AUXS.npy' AUTO_PARSE_FILE = '../npy_data/auto_parse_with_features_FULL_DATASET.npy' AUTO_PARSE_XML_DIR = '/Users/kian/Documents/HONOR/xml_annotations/raw_auto_parse/' class Dataset(object): def __init__(self): self.sentences = [] self.auxs = [] self.gold_auxs = [] self.X = [] self.Y = [] self.section_ends = {k: -1 for k in range(0, 25)} def add(self, section):