def trainPosNeg(self): positive = "./positive" negative = "./negative" pos_files = ptr(positive, '.*') neg_files = ptr(negative, '.*') pos_all_words = [pos_files.raw(fileid).split(" ") for fileid in pos_files.fileids()] neg_all_words = [neg_files.raw(fileid).split(" ") for fileid in neg_files.fileids()] pos_splited_words = [(self.getBigrams(words), 'positive') for words in pos_all_words] neg_splited_words = [(self.getBigrams(words), 'negative') for words in neg_all_words] pos_neg_trainfeats = pos_splited_words[:] + neg_splited_words[:] classifier = SklearnClassifier(LinearSVC()) classifier.train(pos_neg_trainfeats) return classifier
def trainSubObj(self): subjective = "./subjective" objective = "./objective" sub_files = ptr(subjective, '.*') obj_files = ptr(objective, '.*') sub_all_words = [sub_files.raw(fileid).split(" ") for fileid in sub_files.fileids()] obj_all_words = [obj_files.raw(fileid).split(" ") for fileid in obj_files.fileids()] sub_splited_words = [(self.getBigrams(words), 'subjective') for words in sub_all_words] obj_splited_words = [(self.getBigrams(words), 'objective') for words in obj_all_words] sub_obj_trainfeats = sub_splited_words[:] + obj_splited_words[:] # SVM with a Linear Kernel and default parameters classifier = SklearnClassifier(LinearSVC()) classifier.train(sub_obj_trainfeats) return classifier