Example #1
0
                                                n_classes=3,
                                                batch_size=10,
                                                print_steps=20)
classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model,
                                        n_classes=3,
                                        steps=100,
                                        optimizer='Adam',
                                        learning_rate=0.01,
                                        continue_training=True)

# Write results to file:
f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9")
# Continuously train for 1000 steps & predict on test set.
i = 0
print("Initiating training...")
fileWriter.writeTextToFile("Initiating training...", f)
while i < 11:
    print(80 * '=')
    fileWriter.writeTextToFile(80 * '=', f)
    classifier.fit(X_train,
                   y_train,
                   val_monitor,
                   logdir='../TextFiles/logs/cnn_on_characters')

    pred_stances = classifier.predict(X_val)

    score = metrics.accuracy_score(y_val, pred_stances)
    print("Accuracy: %f" % score)
    fileWriter.writeTextToFile("Accuracy: %f" % score, f)

    print(classification_report(y_val, pred_stances, digits=4))
Example #2
0
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')


###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv('../TextFiles/data/tcp_train.csv', sep='\t')

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

print("%d documents" % len(data))
write.writeTextToFile("%d documents" % len(data),file)
print("%d categories" % 3)
write.writeTextToFile("%d categories" % 3,file)
print()

###############################################################################
# define a pipeline combining a text feature extractor with a simple
# classifier
pipeline = Pipeline([
    ('features', FeatureUnion([
        ('unigram_word', Pipeline([
            ('vect', CountVectorizer())
        ])),
        ('td-idf', Pipeline([
            ('vect', CountVectorizer()),
            ('tfidf', TfidfTransformer())
Example #3
0
# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()
data = data[data.Stance != 'NONE']

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

print("%d training documents" % len(data.Abstract))
write.writeTextToFile("%d training documents" % len(data.Abstract), file)
print("%d categories" % 3)
write.writeTextToFile("%d categories" % 3, file)
print()

###############################################################################
# Classifiers
# MultinomialNB(), BernoulliNB(), SVM(), LinearSVM(), SGDClassifier(), LogisticRegression()
clf = MultinomialNB()

print("Using train, validation and test approach with clf {}".format(clf))
write.writeTextToFile(
    "Using train, validation and test approach with clf {}".format(clf), file)

###############################################################################
# define a pipeline combining a text feature extractor with a simple classifier
Example #4
0
                    format='%(asctime)s %(levelname)s %(message)s')

###############################################################################
# Load some categories from the training set

# Uncomment the following to do the analysis on all the categories
#categories = None

data = pd.read_csv(open('../../TextFiles/data/tcp_train.csv'),
                   sep='\t',
                   index_col=0)

cv = StratifiedKFold(data.Stance, n_folds=10, shuffle=True, random_state=1)

print("%d documents" % len(data))
write.writeTextToFile("%d documents" % len(data), file)
print("%d categories" % 3)
write.writeTextToFile("%d categories" % 3, file)
print()

###############################################################################
# define a pipeline combining a text feature extractor with a simple
# classifier
pipeline = Pipeline([
    ('vect', CountVectorizer()),
    #('tfidf', TfidfTransformer()),
    #('clf', MultinomialNB()),
    #('clf', LinearSVC()),
    ('clf', LogisticRegression()),
    #('clf', SVC()),
])
val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val,
                                                early_stopping_rounds=200,
                                                n_classes=3,
                                                batch_size=10,
                                                print_steps=20)
classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=3,
                                        steps=100, optimizer='Adam', learning_rate=0.01,
                                        continue_training=True)

# Write results to file:
f = fileWriter.initFile("../TextFiles/FindingsAndResults/ex9/ex9")
# Continuously train for 1000 steps & predict on test set.
i = 0
print("Initiating training...")
fileWriter.writeTextToFile("Initiating training...", f)
while i<11:
    print(80 * '=')
    fileWriter.writeTextToFile(80 * '=' , f)
    classifier.fit(X_train, y_train, val_monitor, logdir='../TextFiles/logs/cnn_on_characters')

    pred_stances = classifier.predict(X_val)

    score = metrics.accuracy_score(y_val, pred_stances)
    print("Accuracy: %f" % score)
    fileWriter.writeTextToFile("Accuracy: %f" % score, f)

    print (classification_report(y_val, pred_stances, digits=4))
    fileWriter.writeTextToFile(classification_report(y_val, pred_stances, digits=4), f)

    macro_f = fbeta_score(y_val, pred_stances, 1.0,
Example #6
0
###############################################################################
# Load
strength = 'soft'

#data = pd.read_csv('../../TextFiles/data/tcp_train.csv', sep='\t')
data = ptd.getTrainingData()

binaryStances = []
for endorse in data.Endorse.tolist():
    binaryStances.append(ptd.getAbstractStanceVsNoStance(strength, endorse))


cv = StratifiedKFold(binaryStances, n_folds=10, shuffle=True, random_state=1)

print("%d training documents" % len(data.Abstract))
write.writeTextToFile("%d training documents" % len(data.Abstract),file)
print("%d categories" % 3)
write.writeTextToFile("%d categories" % 3,file)
print()

###############################################################################
# Classifiers
# MultinomialNB(), BernoulliNB(), SVM(), LinearSVM(), SGDClassifier(), LogisticRegression()
clf = MultinomialNB()

print("Using train, validation and test approach with clf {}".format(clf))
write.writeTextToFile("Using train, validation and test approach with clf {}".format(clf), file)

###############################################################################
# define a pipeline combining a text feature extractor with a simple classifier
pipeline = Pipeline([