Esempio n. 1
0
 def __init__(self, wd=None, save=None):
     if wd:
         os.chdir(wd)
         self.reddit = str(wd).split("/")[-2]
         self.cutoff = compute_cutoff(wd)
     self.popular_texts = []
     self.unpopular_texts = []
     self.corpus = []
     self.all_texts = []
     self.save = False
     self.word_counts = defaultdict(int)
     self.vectorizer = None
     if save:
         if save.lower()[0] == "y":
             self.save = True
    print 'F1 Score:', F1score
    print 'Confusion matrix:'
    print confusion

    return F1score


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Builds a Naive Bayes model for classification.")
    parser.add_argument("filepath", help="Argument must be the filepath where the text files are located")
    parser.add_argument("topic_type", help="topic_type is either bow, tfidf or lda")
    parser.add_argument("valid_or_test", help="Either v or t to test against validation or test set")
    parser.add_argument("--num_topics", default=10, help="The amount of topics to be grabbed from the LDA model")
    args = parser.parse_args()

    cutoff = compute_cutoff(args.filepath)

    print "Classifying the initial data."
    classify_initial_data(args.filepath, cutoff, SOURCES)
    training_data = make_data(SOURCES, args.filepath)

    if args.valid_or_test[0].lower() == "v":
        print "Classifying the validation data."
        validation_filepath = args.filepath + "/validation"
        classify_initial_data(validation_filepath, cutoff, VALIDATION)
        validation_data = make_data(VALIDATION, validation_filepath)
        create_Naive_Bayes(training_data, validation_data, args.topic_type, args.num_topics)

    else:
        print "Classifying the testing data."
        testing_filepath = args.filepath + "/testing"