def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50
예제 #2
0
파일: test_corpus.py 프로젝트: DeeeFOX/samr
 def test_iter_test_corpus_simple(self):
     test = list(corpus.iter_test_corpus())
     self.assertEqual(len(test), 4)
     self.assertEqual(set("1 99 100 123".split()), set(x.phraseid for x in test))
     self.assertEqual(set("4 7 8 9".split()), set(x.sentenceid for x in test))
     self.assertIn("yo mama so fat", [x.phrase for x in test])
     self.assertEqual(set([None]), set(x.sentiment for x in test))
예제 #3
0
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(
         getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50
예제 #4
0
 def test_iter_test_corpus_simple(self):
     test = list(corpus.iter_test_corpus())
     self.assertEqual(len(test), 4)
     self.assertEqual(set("1 99 100 123".split()),
                      set(x.phraseid for x in test))
     self.assertEqual(set("4 7 8 9".split()),
                      set(x.sentenceid for x in test))
     self.assertIn("yo mama so fat", [x.phrase for x in test])
     self.assertEqual(set([None]), set(x.sentiment for x in test))
예제 #5
0
if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from samr.corpus import iter_corpus, iter_test_corpus
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    t1=int(round(time.time() *1000))
    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    t2=int(round(time.time() *1000))

    test = list(iter_test_corpus())
    prediction = predictor.predict2(test)
    t3=int(round(time.time() *1000))

    print t2-t1, t3-t2

    writer = csv.writer(sys.stdout)
    writer.writerow(("PhraseId", "Sentiment"))
    for datapoint, sentiment in zip(test, prediction):
        writer.writerow((datapoint.phraseid, sentiment))
예제 #6
0
                    value = float(value)
                except ValueError:
                    pass
        new[key] = value
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from samr.corpus import iter_corpus, iter_test_corpus
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    test = list(iter_test_corpus())
    prediction = predictor.predict(test)

    writer = csv.writer(sys.stdout)
    writer.writerow(("PhraseId", "Sentiment"))
    for datapoint, sentiment in zip(test, prediction):
        writer.writerow((datapoint.phraseid, sentiment))