n_classes=3, batch_size=10, print_steps=20) classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Write results to file: f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9") # Continuously train for 1000 steps & predict on test set. i = 0 print("Initiating training...") fileWriter.writeTextToFile("Initiating training...", f) while i < 11: print(80 * '=') fileWriter.writeTextToFile(80 * '=', f) classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters') pred_stances = classifier.predict(X_val) score = metrics.accuracy_score(y_val, pred_stances) print("Accuracy: %f" % score) fileWriter.writeTextToFile("Accuracy: %f" % score, f) print(classification_report(y_val, pred_stances, digits=4))
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load some categories from the training set # Uncomment the following to do the analysis on all the categories #categories = None data = pd.read_csv('../TextFiles/data/tcp_train.csv', sep='\t') cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1) print("%d documents" % len(data)) write.writeTextToFile("%d documents" % len(data),file) print("%d categories" % 3) write.writeTextToFile("%d categories" % 3,file) print() ############################################################################### # define a pipeline combining a text feature extractor with a simple # classifier pipeline = Pipeline([ ('features', FeatureUnion([ ('unigram_word', Pipeline([ ('vect', CountVectorizer()) ])), ('td-idf', Pipeline([ ('vect', CountVectorizer()), ('tfidf', TfidfTransformer())
# Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' #data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') data = ptd.getTrainingData() data = data[data.Stance != 'NONE'] cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1) print("%d training documents" % len(data.Abstract)) write.writeTextToFile("%d training documents" % len(data.Abstract), file) print("%d categories" % 3) write.writeTextToFile("%d categories" % 3, file) print() ############################################################################### # Classifiers # MultinomialNB(), BernoulliNB(), SVM(), LinearSVM(), SGDClassifier(), LogisticRegression() clf = MultinomialNB() print("Using train, validation and test approach with clf {}".format(clf)) write.writeTextToFile( "Using train, validation and test approach with clf {}".format(clf), file) ############################################################################### # define a pipeline combining a text feature extractor with a simple classifier
format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load some categories from the training set # Uncomment the following to do the analysis on all the categories #categories = None data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'), sep='\t', index_col=0) cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1) print("%d documents" % len(data)) write.writeTextToFile("%d documents" % len(data), file) print("%d categories" % 3) write.writeTextToFile("%d categories" % 3, file) print() ############################################################################### # define a pipeline combining a text feature extractor with a simple # classifier pipeline = Pipeline([ ('vect', CountVectorizer()), #('tfidf', TfidfTransformer()), #('clf', MultinomialNB()), #('clf', LinearSVC()), ('clf', LogisticRegression()), #('clf', SVC()), ])
val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, early_stopping_rounds=200, n_classes=3, batch_size=10, print_steps=20) classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Write results to file: f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9") # Continuously train for 1000 steps & predict on test set. i = 0 print("Initiating training...") fileWriter.writeTextToFile("Initiating training...", f) while i<11: print(80 * '=') fileWriter.writeTextToFile(80 * '=' , f) classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters') pred_stances = classifier.predict(X_val) score = metrics.accuracy_score(y_val, pred_stances) print("Accuracy: %f" % score) fileWriter.writeTextToFile("Accuracy: %f" % score, f) print (classification_report(y_val, pred_stances, digits=4)) fileWriter.writeTextToFile(classification_report(y_val, pred_stances, digits=4), f) macro_f = fbeta_score(y_val, pred_stances, 1.0,
############################################################################### # Load strength = 'soft' #data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') data = ptd.getTrainingData() binaryStances = [] for endorse in data.Endorse.tolist(): binaryStances.append(ptd.getAbstractStanceVsNoStance(strength, endorse)) cv = StratifiedKFold(binaryStances, n_folds=10, shuffle=True, random_state=1) print("%d training documents" % len(data.Abstract)) write.writeTextToFile("%d training documents" % len(data.Abstract),file) print("%d categories" % 3) write.writeTextToFile("%d categories" % 3,file) print() ############################################################################### # Classifiers # MultinomialNB(), BernoulliNB(), SVM(), LinearSVM(), SGDClassifier(), LogisticRegression() clf = MultinomialNB() print("Using train, validation and test approach with clf {}".format(clf)) write.writeTextToFile("Using train, validation and test approach with clf {}".format(clf), file) ############################################################################### # define a pipeline combining a text feature extractor with a simple classifier pipeline = Pipeline([