import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer import pandas as pd import System.Utilities.write_to_file as write #print(__doc__) file = write.initFile("GridSearch-results ex3") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load some categories from the training set # Uncomment the following to do the analysis on all the categories #categories = None data = pd.read_csv('../TextFiles/data/tcp_train.csv', sep='\t') cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, early_stopping_rounds=200, n_classes=3, batch_size=10, print_steps=20) classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Write results to file: f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9") # Continuously train for 1000 steps & predict on test set. i = 0 print("Initiating training...") fileWriter.writeTextToFile("Initiating training...", f) while i < 11: print(80 * '=') fileWriter.writeTextToFile(80 * '=', f) classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters') pred_stances = classifier.predict(X_val) score = metrics.accuracy_score(y_val, pred_stances)
import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.cross_validation import StratifiedKFold from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression import System.Utilities.write_to_file as write #print(__doc__) file = write.initFile("GridSearch-results-tuned-LR") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load some categories from the training set # Uncomment the following to do the analysis on all the categories #categories = None data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'), sep='\t', index_col=0)
import logging import pandas as pd import System.Utilities.write_to_file as write import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import SGDClassifier, LogisticRegression file = write.initFile("ex12-linearSVC-part2") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' #data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') data = ptd.getTrainingData() data = data[data.Stance != 'NONE'] cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
import logging import pandas as pd import System.Utilities.write_to_file as write import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import SGDClassifier, LogisticRegression file = write.initFile("ex12-SVC") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' #data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') data = ptd.getTrainingData() binaryStances = [] for endorse in data.Endorse.tolist():
# Max across each filter to get useful features for classification. pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) # Apply regular WX + B and classification. return skflow.models.logistic_regression(pool2, y) val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, early_stopping_rounds=200, n_classes=3, batch_size=10, print_steps=20) classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Write results to file: f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9") # Continuously train for 1000 steps & predict on test set. i = 0 print("Initiating training...") fileWriter.writeTextToFile("Initiating training...", f) while i<11: print(80 * '=') fileWriter.writeTextToFile(80 * '=' , f) classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters') pred_stances = classifier.predict(X_val) score = metrics.accuracy_score(y_val, pred_stances) print("Accuracy: %f" % score) fileWriter.writeTextToFile("Accuracy: %f" % score, f)
import System.Utilities.write_to_file as write import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import SGDClassifier, LogisticRegression from System.DataProcessing.GloveVectorizer.glove_transformer import GloveVectorizer file = write.initFile("ex7_glove") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1) glove_vecs = pd.read_pickle('/home/henrik/tmp/System/DataProcessing/GloveVectorizer/vectors/glove.6B.200d_tcp_abstracts.pkl')
import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.cross_validation import StratifiedKFold from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression import System.Utilities.write_to_file as write #print(__doc__) file = write.initFile("GridSearch-results-tuned-LR") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load some categories from the training set # Uncomment the following to do the analysis on all the categories #categories = None data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'), sep='\t', index_col=0) cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
import System.Utilities.write_to_file as write import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import SGDClassifier, LogisticRegression from System.DataProcessing.GloveVectorizer.glove_transformer import GloveVectorizer file = write.initFile("ex7_glove") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1) glove_vecs = pd.read_pickle( '/home/henrik/tmp/System/DataProcessing/GloveVectorizer/vectors/glove.6B.200d_tcp_abstracts.pkl'
import logging import pandas as pd import System.Utilities.write_to_file as write import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.svm import LinearSVC, SVC from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import FunctionTransformer from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import SGDClassifier, LogisticRegression file = write.initFile("ex12-SVC") # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ############################################################################### # Load strength = 'soft' #data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t') data = ptd.getTrainingData() binaryStances = [] for endorse in data.Endorse.tolist(): binaryStances.append(ptd.getAbstractStanceVsNoStance(strength, endorse))