def classify(self, input_text=None): training_text = [] for language in self.languages: corpusdir = os.path.join(self.configs['dirs']['CORPUS_DIR'], language) stopwrds = stopwords.words(language) training_text.extend([(w.lower(), language) for w in get_corpus(corpusdir) if len(w) >= 2 and w not in stopwrds]) self.word_features = self.word_feats(self.get_words_in(training_text)) training_set = nltk.classify.apply_features(self.extract, training_text) classifier = nltk.NaiveBayesClassifier.train(training_set) classifier = nltk.NaiveBayesClassifier.train(training_set) text = input_text if input_text else self.input_text prediction = classifier.classify(self.extract(self.input_text)) return prediction
def main(): snow_queen = get_corpus( corpusdir="/Users/spiridoulaoregan/nltk_data/test_data", filename="snow_queen_german.txt") #root_dir, input_text, config_dirs oc = OracleClassifier(root_dir="/home/roulaoregan/algo/nltk_data", input_text=snow_queen, config_dirs=os.path.join(os.getcwd(), "configs", "dirs.json")) print "classified: ", oc.classify()
def main(): snow_queen = get_corpus(corpusdir="/Users/spiridoulaoregan/nltk_data/test_data", filename="snow_queen_german.txt") #root_dir, input_text, config_dirs oc = OracleClassifier(root_dir="/home/roulaoregan/algo/nltk_data", input_text=snow_queen, config_dirs=os.path.join(os.getcwd(), "configs", "dirs.json")) print "classified: ", oc.classify()