kernel_list = ["custom", "linear", "poly_2","poly_3", "rbf"] # results -> k -> kernal results = [] limit_1 = 400 #training set limit_2 = 400 #validation set for k in k_list: print "Running experiment for k=" + str(k) + "..." ab = ABClassifier() ab.download_cursors(limit_unlabeled = 5000, limit_labeled = 5000) #ab.download_tweet_cursors(limit_unlabeled = 800, limit_labeled = 1000) ab.run_lsa(k=k) #pos_cursor_training = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).limit(400) #neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400) pos_cursor_training = ab.labeled_collection.find({"bully":True},timeout=False).limit(400) neg_cursor_training = ab.labeled_collection.find({"bully":False},timeout=False).limit(400) training = [] tlabels = [] pos_validation = [] pos_vlabels = [] neg_validation = [] neg_vlabels = []
import sys sys.path.append('..') from ABClassifier.ABClassifier import ABClassifier import numpy as np ab = ABClassifier() ab.download_cursors(limit_unlabeled = 1000, limit_labeled = 1000) ab.run_lsa(k=100) ab.compute_context_vectors() pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list) neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list) unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list) print "done getting pws" x = np.array(pos_labeled_pws.values()) a = np.asarray(x) np.savetxt('pos_labeled.csv', a, delimiter=",") y = np.array(neg_labeled_pws.values()) b = np.asarray(y) np.savetxt('neg_labeled.csv', b, delimiter=",") z = np.array(unlabeled_pws.values()) c = np.asarray(z) np.savetxt('unlabeled.csv', c, delimiter=",")
-Uses ONLY twitter data for training/validation """ import sys sys.path.append('../..') from ABClassifier.ABClassifier import ABClassifier import numpy as np import os save_location = '../../experiment_data/experiment_9' ab = ABClassifier() ab.download_tweet_cursors(limit_unlabeled=2500, limit_labeled=2500) ab.run_lsa(k=150) ab.compute_context_vectors(save_location=save_location) print "Performing pairwise similarity measures..." pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list) neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list) unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list) print "Done." print "Saving..." x = np.array(pos_labeled_pws.values()) a = np.asarray(x) np.savetxt(save_location + '/pw_pos.csv', a, delimiter=",")
import sys sys.path.append('..') from ABClassifier.ABClassifier import ABClassifier import numpy as np ab = ABClassifier() ab.download_cursors(limit_unlabeled=10000, limit_labeled=10000) ab.run_lsa(k=100) ab.compute_context_vectors() pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list) neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list) unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list) print "done getting pws" x = np.array(pos_labeled_pws.values()) a = np.asarray(x) np.savetxt('pos_labeled.csv', a, delimiter=",") y = np.array(neg_labeled_pws.values()) b = np.asarray(y) np.savetxt('neg_labeled.csv', b, delimiter=",") z = np.array(unlabeled_pws.values()) c = np.asarray(z) np.savetxt('unlabeled.csv', c, delimiter=",")
-Uses ONLY twitter data for training/validation """ import sys sys.path.append('../..') from ABClassifier.ABClassifier import ABClassifier import numpy as np import os save_location = '../../experiment_data/experiment_9' ab = ABClassifier() ab.download_tweet_cursors(limit_unlabeled = 2500, limit_labeled = 2500) ab.run_lsa(k=150) ab.compute_context_vectors(save_location = save_location) print "Performing pairwise similarity measures..." pos_labeled_pws = ab.pairwise_similarity(ab.pos_labeled_cv_list) neg_labeled_pws = ab.pairwise_similarity(ab.neg_labeled_cv_list) unlabeled_pws = ab.pairwise_similarity(ab.unlabeled_cv_list) print "Done." print "Saving..." x = np.array(pos_labeled_pws.values()) a = np.asarray(x)