timeout=False).limit(400) neg_cursor_training = ab.labeled_collection.find({ "bully": False }, timeout=False).limit(400) training = [] tlabels = [] pos_validation = [] pos_vlabels = [] neg_validation = [] neg_vlabels = [] for p in pos_cursor_training: cv = ab.get_context_vector(p["text"]) training.append(cv / np.linalg.norm(cv)) tlabels.append(1) for n in neg_cursor_training: cv = ab.get_context_vector(n["text"]) training.append(cv / np.linalg.norm(cv)) tlabels.append(-1) pos_cursor_validation = ab.db["tweets"].find({"bullying_label": "1"}, timeout=False) neg_cursor_validation = ab.db["tweets"].find({"bullying_label": "0"}, timeout=False) #get validation data #pos_cursor_validation = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).skip(400) #neg_cursor_validation = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).skip(400)
ab.run_lsa(k=k) context_tweets = [ "Literally go f**k yourself, because you're honestly pathetic.", "f**k you f****n w***e go f**k yourself stupid bitch", "but this bad I want to kick her ass cuz she thinks she's a hard chola like sit ur fat ass down lol", "course he did he's a whipped bitch that will say anything to make u happy, unlike ur mum who called u fat", "Fat pig. You're disgusting.", "From some illiterate online keyboard warrior? Go back to sucking your butt buddy's fat junk.", "God Says; Evil Don't Know The Way. You are gay with AIDS & your sin cost you your anointing! This is why you use DUST!" "f**k you stupid f****t f*g" ] tweet_cvs = [] for c in context_tweets: tweet_cvs.append(ab.get_context_vector(c)) for i in xrange(0,1): for t in thresholds: print "Running experiment for t = " + str(t) print "Starting classification..." unlabeled_cursor = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False) positive_set = [] negative_set = [] for u in unlabeled_cursor:
#neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400) pos_cursor_training = ab.labeled_collection.find({"bully":True},timeout=False).limit(400) neg_cursor_training = ab.labeled_collection.find({"bully":False},timeout=False).limit(400) training = [] tlabels = [] pos_validation = [] pos_vlabels = [] neg_validation = [] neg_vlabels = [] for p in pos_cursor_training: cv = ab.get_context_vector(p["text"]) training.append(cv/np.linalg.norm(cv)) tlabels.append(1) for n in neg_cursor_training: cv = ab.get_context_vector(n["text"]) training.append(cv/np.linalg.norm(cv)) tlabels.append(-1) pos_cursor_validation = ab.db["tweets"].find({"bullying_label":"1"},timeout=False) neg_cursor_validation = ab.db["tweets"].find({"bullying_label":"0"},timeout=False) #get validation data #pos_cursor_validation = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).skip(400) #neg_cursor_validation = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).skip(400)
unlabeled_cursor = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False) pos_cursor = ab.labeled_collection.find({"bully":True},timeout=False).limit(unlabeled_cursor.count()) neg_cursor = ab.labeled_collection.find({"bully":False},timeout=False).limit(unlabeled_cursor.count()) unl = [] pos = [] neg = [] for u,p,n in zip(unlabeled_cursor, pos_cursor, neg_cursor): u_obj = {} p_obj = {} n_obj = {} u_obj["text"] = u["text"] u_obj["cv"] = ab.get_context_vector(u_obj["text"]) u_obj["bullying_label"] = u["bullying_label"] unl.append(u_obj) p_obj["text"] = p["text"] p_obj["cv"] = ab.get_context_vector(p_obj["text"]) pos.append(p_obj) n_obj["text"] = n["text"] n_obj["cv"] = ab.get_context_vector(n_obj["text"]) neg.append(n_obj) positive_set = [] negative_set = [] for u in unl:
ab.download_cursors(limit_unlabeled=limit_1, limit_labeled=limit_1) ab.run_lsa(k=k) context_tweets = [ "Literally go f**k yourself, because you're honestly pathetic.", "f**k you f****n w***e go f**k yourself stupid bitch", "but this bad I want to kick her ass cuz she thinks she's a hard chola like sit ur fat ass down lol", "course he did he's a whipped bitch that will say anything to make u happy, unlike ur mum who called u fat", "Fat pig. You're disgusting.", "From some illiterate online keyboard warrior? Go back to sucking your butt buddy's fat junk.", "God Says; Evil Don't Know The Way. You are gay with AIDS & your sin cost you your anointing! This is why you use DUST!" "f**k you stupid f****t f*g" ] tweet_cvs = [] for c in context_tweets: tweet_cvs.append(ab.get_context_vector(c)) for i in xrange(0, 1): for t in thresholds: print "Running experiment for t = " + str(t) print "Starting classification..." unlabeled_cursor = ab.db.tweets.find( {"bullying_label": { '$exists': True }}, timeout=False) positive_set = [] negative_set = []
}, timeout=False).limit( unlabeled_cursor.count()) unl = [] pos = [] neg = [] for u, p, n in zip(unlabeled_cursor, pos_cursor, neg_cursor): u_obj = {} p_obj = {} n_obj = {} u_obj["text"] = u["text"] u_obj["cv"] = ab.get_context_vector(u_obj["text"]) u_obj["bullying_label"] = u["bullying_label"] unl.append(u_obj) p_obj["text"] = p["text"] p_obj["cv"] = ab.get_context_vector(p_obj["text"]) pos.append(p_obj) n_obj["text"] = n["text"] n_obj["cv"] = ab.get_context_vector(n_obj["text"]) neg.append(n_obj) positive_set = [] negative_set = [] for u in unl:
for i in xrange(0,1): for t in thresholds: print "Running experiment for t = " + str(t) print "Starting classification..." unlabeled_cursor = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False) pos_cursor = ab.labeled_collection.find({"bully":True},timeout=False).limit(limit_1) neg_cursor = ab.labeled_collection.find({"bully":False},timeout=False).limit(limit_1) p_avg = np.zeros(k) n_avg = np.zeros(k) for p,n in zip(pos_cursor, neg_cursor): p_avg = p_avg + ab.get_context_vector(p["text"]) n_avg = n_avg + ab.get_context_vector(n["text"]) p_avg = p_avg/limit_1 n_avg = n_avg/limit_1 positive_set = [] negative_set = [] for u in unlabeled_cursor: u_obj = {} u_obj["text"] = u["text"] u_obj["cv"] = ab.get_context_vector(u_obj["text"])
{"bullying_label": { '$exists': True }}, timeout=False) pos_cursor = ab.labeled_collection.find({ "bully": True }, timeout=False).limit(limit_1) neg_cursor = ab.labeled_collection.find({ "bully": False }, timeout=False).limit(limit_1) p_avg = np.zeros(k) n_avg = np.zeros(k) for p, n in zip(pos_cursor, neg_cursor): p_avg = p_avg + ab.get_context_vector(p["text"]) n_avg = n_avg + ab.get_context_vector(n["text"]) p_avg = p_avg / limit_1 n_avg = n_avg / limit_1 positive_set = [] negative_set = [] for u in unlabeled_cursor: u_obj = {} u_obj["text"] = u["text"] u_obj["cv"] = ab.get_context_vector(u_obj["text"]) u_obj["bullying_label"] = u["bullying_label"]