def setUp(self): self.train, self.test = corpus.make_train_test_split("mySeed") self.realTest = corpus.iter_test_corpus() self.corpus = buildVocabulary(getPhrases(self.train + self.test + self.realTest)) # self.training = buildVocabulary(getPhrases(self.train)) # self.testing = buildVocabulary(getPhrases(self.test)) self.size = 50
def test_iter_test_corpus_simple(self): test = list(corpus.iter_test_corpus()) self.assertEqual(len(test), 4) self.assertEqual(set("1 99 100 123".split()), set(x.phraseid for x in test)) self.assertEqual(set("4 7 8 9".split()), set(x.sentenceid for x in test)) self.assertIn("yo mama so fat", [x.phrase for x in test]) self.assertEqual(set([None]), set(x.sentiment for x in test))
def setUp(self): self.train, self.test = corpus.make_train_test_split("mySeed") self.realTest = corpus.iter_test_corpus() self.corpus = buildVocabulary( getPhrases(self.train + self.test + self.realTest)) # self.training = buildVocabulary(getPhrases(self.train)) # self.testing = buildVocabulary(getPhrases(self.test)) self.size = 50
if __name__ == "__main__": import argparse import json import csv import sys from samr.corpus import iter_corpus, iter_test_corpus from samr.predictor import PhraseSentimentPredictor parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("filename") config = parser.parse_args() config = json.load(open(config.filename)) t1=int(round(time.time() *1000)) predictor = PhraseSentimentPredictor(**config) predictor.fit(list(iter_corpus())) t2=int(round(time.time() *1000)) test = list(iter_test_corpus()) prediction = predictor.predict2(test) t3=int(round(time.time() *1000)) print t2-t1, t3-t2 writer = csv.writer(sys.stdout) writer.writerow(("PhraseId", "Sentiment")) for datapoint, sentiment in zip(test, prediction): writer.writerow((datapoint.phraseid, sentiment))
value = float(value) except ValueError: pass new[key] = value return new if __name__ == "__main__": import argparse import json import csv import sys from samr.corpus import iter_corpus, iter_test_corpus from samr.predictor import PhraseSentimentPredictor parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("filename") config = parser.parse_args() config = json.load(open(config.filename)) predictor = PhraseSentimentPredictor(**config) predictor.fit(list(iter_corpus())) test = list(iter_test_corpus()) prediction = predictor.predict(test) writer = csv.writer(sys.stdout) writer.writerow(("PhraseId", "Sentiment")) for datapoint, sentiment in zip(test, prediction): writer.writerow((datapoint.phraseid, sentiment))