def test_simple_duplicates(self):
     dupe = Datapoint(phraseid="a", sentenceid="b", phrase="b a", sentiment="1")
     # Train has a lot of "2" sentiments
     train = [Datapoint(phraseid=str(i),
                        sentenceid=str(i),
                        phrase="a b",
                        sentiment="2") for i in range(10)]
     train.append(dupe)
     test = [Datapoint(*dupe)]
     predictor = PhraseSentimentPredictor(duplicates=True)
     predictor.fit(train)
     predicted = predictor.predict(test)[0]
     self.assertEqual(predicted, "1")
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
 def test_simple_duplicates(self):
     dupe = Datapoint(phraseid="a",
                      sentenceid="b",
                      phrase="b a",
                      sentiment="1")
     # Train has a lot of "2" sentiments
     train = [
         Datapoint(phraseid=str(i),
                   sentenceid=str(i),
                   phrase="a b",
                   sentiment="2") for i in range(10)
     ]
     train.append(dupe)
     test = [Datapoint(*dupe)]
     predictor = PhraseSentimentPredictor(duplicates=True)
     predictor.fit(train)
     predicted = predictor.predict(test)[0]
     self.assertEqual(predicted, "1")
    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)
if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from samr.corpus import iter_corpus, iter_test_corpus
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    t1=int(round(time.time() *1000))
    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    t2=int(round(time.time() *1000))

    test = list(iter_test_corpus())
    prediction = predictor.predict2(test)
    t3=int(round(time.time() *1000))

    print t2-t1, t3-t2

    writer = csv.writer(sys.stdout)
    writer.writerow(("PhraseId", "Sentiment"))
    for datapoint, sentiment in zip(test, prediction):
        writer.writerow((datapoint.phraseid, sentiment))
Beispiel #8
0
                    value = float(value)
                except ValueError:
                    pass
        new[key] = value
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from samr.corpus import iter_corpus, iter_test_corpus
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    test = list(iter_test_corpus())
    prediction = predictor.predict(test)

    writer = csv.writer(sys.stdout)
    writer.writerow(("PhraseId", "Sentiment"))
    for datapoint, sentiment in zip(test, prediction):
        writer.writerow((datapoint.phraseid, sentiment))
 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)
 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)
Beispiel #11
0
    def report(self, score):
        new = time.time()
        self.i += 1
        print("individual {}-th fold score={}% took {} seconds".format(
            self.i, score * 100, new - self.last))
        self.last = new


if __name__ == "__main__":
    import argparse
    import json

    from samr.evaluation import cross_validation
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    factory = lambda: PhraseSentimentPredictor(**config)
    factory()  # Run once to check config is ok

    report = PrintPartialCV()
    result = cross_validation(factory,
                              seed="robot rock",
                              callback=report.report)

    print("10-fold cross validation score {}%".format(result * 100))
 def runThroughSetup(self, **kwargs):
     predictor = PhraseSentimentPredictor(**kwargs)
     predictor.fit(self.train[:self.samples])
     return str(predictor.score(self.test))