Example #1
0
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer

import pandas as pd
import System.Utilities.write_to_file as write

#print(__doc__)

file = write.initFile("GridSearch-results ex3")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')


###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv('../TextFiles/data/tcp_train.csv', sep='\t')

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
Example #2
0
val_monitor = skflow.monitors.ValidationMonitor(X_val,
                                                y_val,
                                                early_stopping_rounds=200,
                                                n_classes=3,
                                                batch_size=10,
                                                print_steps=20)
classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model,
                                        n_classes=3,
                                        steps=100,
                                        optimizer='Adam',
                                        learning_rate=0.01,
                                        continue_training=True)

# Write results to file:
f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9")
# Continuously train for 1000 steps & predict on test set.
i = 0
print("Initiating training...")
fileWriter.writeTextToFile("Initiating training...", f)
while i < 11:
    print(80 * '=')
    fileWriter.writeTextToFile(80 * '=', f)
    classifier.fit(X_train,
                   y_train,
                   val_monitor,
                   logdir='../TextFiles/logs/cnn_on_characters')

    pred_stances = classifier.predict(X_val)

    score = metrics.accuracy_score(y_val, pred_stances)
Example #3
0
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import StratifiedKFold
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

import System.Utilities.write_to_file as write

#print(__doc__)

file = write.initFile("GridSearch-results-tuned-LR")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'),
                   sep='\t',
                   index_col=0)
Example #4
0
import logging
import pandas as pd
import System.Utilities.write_to_file as write
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

file = write.initFile("ex12-linearSVC-part2")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()
data = data[data.Stance != 'NONE']

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
Example #5
0
import logging
import pandas as pd
import System.Utilities.write_to_file as write
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

file = write.initFile("ex12-SVC")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')


###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()

binaryStances = []
for endorse in data.Endorse.tolist():
        # Max across each filter to get useful features for classification.
        pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
    # Apply regular WX + B and classification.
    return skflow.models.logistic_regression(pool2, y)

val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val,
                                                early_stopping_rounds=200,
                                                n_classes=3,
                                                batch_size=10,
                                                print_steps=20)
classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3,
                                        steps=100, optimizer='Adam', learning_rate=0.01,
                                        continue_training=True)

# Write results to file:
f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9")
# Continuously train for 1000 steps & predict on test set.
i = 0
print("Initiating training...")
fileWriter.writeTextToFile("Initiating training...", f)
while i<11:
    print(80 * '=')
    fileWriter.writeTextToFile(80 * '=' , f)
    classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters')

    pred_stances = classifier.predict(X_val)

    score = metrics.accuracy_score(y_val, pred_stances)
    print("Accuracy: %f" % score)
    fileWriter.writeTextToFile("Accuracy: %f" % score, f)
Example #7
0
import System.Utilities.write_to_file as write
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

from System.DataProcessing.GloveVectorizer.glove_transformer import GloveVectorizer

file = write.initFile("ex7_glove")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')


###############################################################################
# Load
strength = 'soft'

data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

glove_vecs = pd.read_pickle('/home/henrik/tmp/System/DataProcessing/GloveVectorizer/vectors/glove.6B.200d_tcp_abstracts.pkl')
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import StratifiedKFold
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

import System.Utilities.write_to_file as write

#print(__doc__)

file = write.initFile("GridSearch-results-tuned-LR")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')


###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'), sep='\t', index_col=0)

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
Example #9
0
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer

import pandas as pd
import System.Utilities.write_to_file as write

#print(__doc__)

file = write.initFile("GridSearch-results ex3")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv('../TextFiles/data/tcp_train.csv', sep='\t')

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)
Example #10
0
import System.Utilities.write_to_file as write
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

from System.DataProcessing.GloveVectorizer.glove_transformer import GloveVectorizer

file = write.initFile("ex7_glove")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load
strength = 'soft'

data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

glove_vecs = pd.read_pickle(
    '/home/henrik/tmp/System/DataProcessing/GloveVectorizer/vectors/glove.6B.200d_tcp_abstracts.pkl'
Example #11
0
import logging
import pandas as pd
import System.Utilities.write_to_file as write
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC, SVC
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import FunctionTransformer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier, LogisticRegression

file = write.initFile("ex12-SVC")

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()

binaryStances = []
for endorse in data.Endorse.tolist():
    binaryStances.append(ptd.getAbstractStanceVsNoStance(strength, endorse))