Beispiel #1
0
                                                     timeout=False).limit(400)
    neg_cursor_training = ab.labeled_collection.find({
        "bully": False
    },
                                                     timeout=False).limit(400)

    training = []
    tlabels = []

    pos_validation = []
    pos_vlabels = []
    neg_validation = []
    neg_vlabels = []

    for p in pos_cursor_training:
        cv = ab.get_context_vector(p["text"])
        training.append(cv / np.linalg.norm(cv))
        tlabels.append(1)

    for n in neg_cursor_training:
        cv = ab.get_context_vector(n["text"])
        training.append(cv / np.linalg.norm(cv))
        tlabels.append(-1)

    pos_cursor_validation = ab.db["tweets"].find({"bullying_label": "1"},
                                                 timeout=False)
    neg_cursor_validation = ab.db["tweets"].find({"bullying_label": "0"},
                                                 timeout=False)
    #get validation data
    #pos_cursor_validation = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).skip(400)
    #neg_cursor_validation = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).skip(400)
Beispiel #2
0
ab.run_lsa(k=k)
context_tweets = [
"Literally go f**k yourself, because you're honestly pathetic.", 
"f**k you f****n w***e go f**k yourself stupid bitch",
"but this bad I want to kick her ass cuz she thinks she's a hard chola like sit ur fat ass down lol",
"course he did he's a whipped bitch that will say anything to make u happy, unlike ur mum who called u fat",
"Fat pig. You're disgusting.",
"From some illiterate online keyboard warrior? Go back to sucking your butt buddy's fat junk.",
"God Says; Evil Don't Know The Way. You are gay with AIDS & your sin cost you your anointing! This is why you use DUST!"
"f**k you stupid f****t f*g"
]


tweet_cvs = []
for c in context_tweets:
	tweet_cvs.append(ab.get_context_vector(c))

for i in xrange(0,1):
	for t in thresholds:

		print "Running experiment for t = " + str(t)

		print "Starting classification..."

		unlabeled_cursor   = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False)

		positive_set = []
		negative_set = []


		for u in unlabeled_cursor:
Beispiel #3
0
	#neg_cursor_training = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).limit(400)

	pos_cursor_training = ab.labeled_collection.find({"bully":True},timeout=False).limit(400)
	neg_cursor_training = ab.labeled_collection.find({"bully":False},timeout=False).limit(400)

	training = []
	tlabels = []

	pos_validation = []
	pos_vlabels = []
	neg_validation = []
	neg_vlabels = []


	for p in pos_cursor_training:
		cv = ab.get_context_vector(p["text"])
		training.append(cv/np.linalg.norm(cv))
		tlabels.append(1)

	for n in neg_cursor_training:
		cv = ab.get_context_vector(n["text"])
		training.append(cv/np.linalg.norm(cv))
		tlabels.append(-1)


	pos_cursor_validation = ab.db["tweets"].find({"bullying_label":"1"},timeout=False)
	neg_cursor_validation = ab.db["tweets"].find({"bullying_label":"0"},timeout=False)
	#get validation data
	#pos_cursor_validation = ab.labeled_collection.find({"bullying_label":"1"},timeout=False).skip(400)
	#neg_cursor_validation = ab.labeled_collection.find({"bullying_label":"0"},timeout=False).skip(400)
Beispiel #4
0
		unlabeled_cursor   = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False)
		pos_cursor = ab.labeled_collection.find({"bully":True},timeout=False).limit(unlabeled_cursor.count())
		neg_cursor = ab.labeled_collection.find({"bully":False},timeout=False).limit(unlabeled_cursor.count())

		unl = []
		pos = []
		neg = []

		for u,p,n in zip(unlabeled_cursor, pos_cursor, neg_cursor):
			
			u_obj = {}
			p_obj = {}
			n_obj = {}

			u_obj["text"] = u["text"]
			u_obj["cv"]   = ab.get_context_vector(u_obj["text"])
			u_obj["bullying_label"] = u["bullying_label"]
			unl.append(u_obj)

			p_obj["text"] = p["text"]
			p_obj["cv"]   = ab.get_context_vector(p_obj["text"])
			pos.append(p_obj)

			n_obj["text"] = n["text"]
			n_obj["cv"]   = ab.get_context_vector(n_obj["text"])
			neg.append(n_obj)

		positive_set = []
		negative_set = []

		for u in unl:
Beispiel #5
0
ab.download_cursors(limit_unlabeled=limit_1, limit_labeled=limit_1)
ab.run_lsa(k=k)
context_tweets = [
    "Literally go f**k yourself, because you're honestly pathetic.",
    "f**k you f****n w***e go f**k yourself stupid bitch",
    "but this bad I want to kick her ass cuz she thinks she's a hard chola like sit ur fat ass down lol",
    "course he did he's a whipped bitch that will say anything to make u happy, unlike ur mum who called u fat",
    "Fat pig. You're disgusting.",
    "From some illiterate online keyboard warrior? Go back to sucking your butt buddy's fat junk.",
    "God Says; Evil Don't Know The Way. You are gay with AIDS & your sin cost you your anointing! This is why you use DUST!"
    "f**k you stupid f****t f*g"
]

tweet_cvs = []
for c in context_tweets:
    tweet_cvs.append(ab.get_context_vector(c))

for i in xrange(0, 1):
    for t in thresholds:

        print "Running experiment for t = " + str(t)

        print "Starting classification..."

        unlabeled_cursor = ab.db.tweets.find(
            {"bullying_label": {
                '$exists': True
            }}, timeout=False)

        positive_set = []
        negative_set = []
Beispiel #6
0
        },
                                                timeout=False).limit(
                                                    unlabeled_cursor.count())

        unl = []
        pos = []
        neg = []

        for u, p, n in zip(unlabeled_cursor, pos_cursor, neg_cursor):

            u_obj = {}
            p_obj = {}
            n_obj = {}

            u_obj["text"] = u["text"]
            u_obj["cv"] = ab.get_context_vector(u_obj["text"])
            u_obj["bullying_label"] = u["bullying_label"]
            unl.append(u_obj)

            p_obj["text"] = p["text"]
            p_obj["cv"] = ab.get_context_vector(p_obj["text"])
            pos.append(p_obj)

            n_obj["text"] = n["text"]
            n_obj["cv"] = ab.get_context_vector(n_obj["text"])
            neg.append(n_obj)

        positive_set = []
        negative_set = []

        for u in unl:
Beispiel #7
0
for i in xrange(0,1):
	for t in thresholds:

		print "Running experiment for t = " + str(t)
		print "Starting classification..."

		unlabeled_cursor   = ab.db.tweets.find({"bullying_label" : {'$exists' :True}}, timeout = False)
		pos_cursor = ab.labeled_collection.find({"bully":True},timeout=False).limit(limit_1)
		neg_cursor = ab.labeled_collection.find({"bully":False},timeout=False).limit(limit_1)


		p_avg = np.zeros(k)
		n_avg = np.zeros(k)
		for p,n in zip(pos_cursor, neg_cursor):

			p_avg = p_avg + ab.get_context_vector(p["text"])

			n_avg = n_avg + ab.get_context_vector(n["text"])

		p_avg = p_avg/limit_1
		n_avg = n_avg/limit_1


		positive_set = []
		negative_set = []

		for u in unlabeled_cursor:

			u_obj = {}
			u_obj["text"] = u["text"]
			u_obj["cv"]   = ab.get_context_vector(u_obj["text"])
Beispiel #8
0
            {"bullying_label": {
                '$exists': True
            }}, timeout=False)
        pos_cursor = ab.labeled_collection.find({
            "bully": True
        }, timeout=False).limit(limit_1)
        neg_cursor = ab.labeled_collection.find({
            "bully": False
        },
                                                timeout=False).limit(limit_1)

        p_avg = np.zeros(k)
        n_avg = np.zeros(k)
        for p, n in zip(pos_cursor, neg_cursor):

            p_avg = p_avg + ab.get_context_vector(p["text"])

            n_avg = n_avg + ab.get_context_vector(n["text"])

        p_avg = p_avg / limit_1
        n_avg = n_avg / limit_1

        positive_set = []
        negative_set = []

        for u in unlabeled_cursor:

            u_obj = {}
            u_obj["text"] = u["text"]
            u_obj["cv"] = ab.get_context_vector(u_obj["text"])
            u_obj["bullying_label"] = u["bullying_label"]