Beispiel #1
0
kernel_list = ["custom", "linear", "poly_2","poly_3", "rbf"]

# results -> k -> kernal
results = []

limit_1 = 400 #training set
limit_2 = 400 #validation set


for k in k_list:
	print "Running experiment for k=" + str(k) + "..."
	ab = ABClassifier()
	ab.download_cursors(limit_unlabeled = 5000, limit_labeled = 5000)
	#ab.download_tweet_cursors(limit_unlabeled = 800, limit_labeled = 1000)
	ab.run_lsa(k=k)

	#pos_cursor_training = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).limit(400)
	#neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400)

	pos_cursor_training = ab.labeled_collection.find({"bully":True},timeout=False).limit(400)
	neg_cursor_training = ab.labeled_collection.find({"bully":False},timeout=False).limit(400)

	training = []
	tlabels = []

	pos_validation = []
	pos_vlabels = []
	neg_validation = []
	neg_vlabels = []
Beispiel #2
0
import sys
sys.path.append('..')
from ABClassifier.ABClassifier import ABClassifier
import numpy as np


ab = ABClassifier()

ab.download_cursors(limit_unlabeled = 1000, limit_labeled = 1000)
ab.run_lsa(k=100)
ab.compute_context_vectors()

pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list)
neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list)
unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list)

print "done getting pws"

x = np.array(pos_labeled_pws.values())
a = np.asarray(x)
np.savetxt('pos_labeled.csv', a, delimiter=",")

y = np.array(neg_labeled_pws.values())
b = np.asarray(y)
np.savetxt('neg_labeled.csv', b, delimiter=",")

z = np.array(unlabeled_pws.values())
c = np.asarray(z)
np.savetxt('unlabeled.csv', c, delimiter=",")

Beispiel #3
0
    -Uses ONLY twitter data for training/validation

"""

import sys

sys.path.append('../..')
from ABClassifier.ABClassifier import ABClassifier
import numpy as np
import os

save_location = '../../experiment_data/experiment_9'

ab = ABClassifier()
ab.download_tweet_cursors(limit_unlabeled=2500, limit_labeled=2500)
ab.run_lsa(k=150)
ab.compute_context_vectors(save_location=save_location)

print "Performing pairwise similarity measures..."

pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list)
neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list)
unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list)

print "Done."

print "Saving..."

x = np.array(pos_labeled_pws.values())
a = np.asarray(x)
np.savetxt(save_location + '/pw_pos.csv', a, delimiter=",")
Beispiel #4
0
import sys
sys.path.append('..')
from ABClassifier.ABClassifier import ABClassifier
import numpy as np

ab = ABClassifier()

ab.download_cursors(limit_unlabeled=10000, limit_labeled=10000)
ab.run_lsa(k=100)
ab.compute_context_vectors()

pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list)
neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list)
unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list)

print "done getting pws"

x = np.array(pos_labeled_pws.values())
a = np.asarray(x)
np.savetxt('pos_labeled.csv', a, delimiter=",")

y = np.array(neg_labeled_pws.values())
b = np.asarray(y)
np.savetxt('neg_labeled.csv', b, delimiter=",")

z = np.array(unlabeled_pws.values())
c = np.asarray(z)
np.savetxt('unlabeled.csv', c, delimiter=",")
Beispiel #5
0
    -Uses ONLY twitter data for training/validation

"""


import sys
sys.path.append('../..')
from ABClassifier.ABClassifier import ABClassifier
import numpy as np
import os

save_location = '../../experiment_data/experiment_9'

ab = ABClassifier()
ab.download_tweet_cursors(limit_unlabeled = 2500, limit_labeled = 2500)
ab.run_lsa(k=150)
ab.compute_context_vectors(save_location = save_location)

print "Performing pairwise similarity measures..."

pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list)
neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list)
unlabeled_pws   = ab.pairwise_similarity(ab.unlabeled_cv_list)

print "Done."


print "Saving..."

x = np.array(pos_labeled_pws.values())
a = np.asarray(x)