def test_simple_duplicates(self):
     dupe = Datapoint(phraseid="a", sentenceid="b", phrase="b a", sentiment="1")
     # Train has a lot of "2" sentiments
     train = [Datapoint(phraseid=str(i),
                        sentenceid=str(i),
                        phrase="a b",
                        sentiment="2") for i in range(10)]
     train.append(dupe)
     test = [Datapoint(*dupe)]
     predictor = PhraseSentimentPredictor(duplicates=True)
     predictor.fit(train)
     predicted = predictor.predict(test)[0]
     self.assertEqual(predicted, "1")
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
 def test_simple_duplicates(self):
     dupe = Datapoint(phraseid="a",
                      sentenceid="b",
                      phrase="b a",
                      sentiment="1")
     # Train has a lot of "2" sentiments
     train = [
         Datapoint(phraseid=str(i),
                   sentenceid=str(i),
                   phrase="a b",
                   sentiment="2") for i in range(10)
     ]
     train.append(dupe)
     test = [Datapoint(*dupe)]
     predictor = PhraseSentimentPredictor(duplicates=True)
     predictor.fit(train)
     predicted = predictor.predict(test)[0]
     self.assertEqual(predicted, "1")
Ejemplo n.º 5
0
                    value = float(value)
                except ValueError:
                    pass
        new[key] = value
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from samr.corpus import iter_corpus, iter_test_corpus
    from samr.predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    test = list(iter_test_corpus())
    prediction = predictor.predict(test)

    writer = csv.writer(sys.stdout)
    writer.writerow(("PhraseId", "Sentiment"))
    for datapoint, sentiment in zip(test, prediction):
        writer.writerow((datapoint.phraseid, sentiment))
    # print(list(iter_corpus())[:10])

    predictor.fit(list(iter_corpus()))
    # print("prediction Fitting Done")
    test = list(iter_test_corpus())
    # print("Testing Fitting Done")

    # print("Testing done")



    dataF = open("../data/outfinal2.csv")
    header_csv = dataF.readline()
    data = dataF.read().splitlines()
    test = [Datapoint(d.split(",")[0],None) for d in data]
    prediction = predictor.predict(test)

    print(header_csv[:-1]+",Sentiment")
    i=0
    for datapoint, sentiment in zip(test, prediction):
        data[i]+=","+str(sentiment)
        print(data[i])
        i+=1