Beispiel #1
0
                    value = float(value)
                except ValueError:
                    pass
        new[key] = value
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from corpus import iter_corpus, iter_test_corpus
    from predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    start=time.time()
    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    print "fitting takes "+str(time.time()-start)
    test = list(iter_test_corpus())
    #prediction = predictor.predict(test)
    score = predictor.score(test,'test')
    print("test score {}%".format(score * 100))
    print 'programme finished!'
Beispiel #2
0
    import csv, os
    from transformations import ExtractText

    if not os.path.exists('./data/vocabulary'):
        datapoints = list(iter_corpus())
        vocabulary = set()
        et = ExtractText()
        X = et.transform(datapoints)
        for datap in X:
            for w in datap.split():
                vocabulary.add(w.lower())
        vocabulary = list(vocabulary)
        vocabulary.sort()
        with open('./data/vocabulary', 'wb') as f:
            wr = csv.writer(f)
            for voc in vocabulary:
                wr.writerow([voc])

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    factory = lambda: PhraseSentimentPredictor(**config)
    factory()  # Run once to check config is ok

    report = PrintPartialCV()
    analyse(factory)

    print "Analysis finished!"