Example #1
0
def main():
    significant_posts = defaultdict(int)
    for post in data.posts:
        try:
            if data.posts[post]["day"][1] - data.posts[post]["day"][
                    2] >= threshold:
                significant_posts[data.posts[post]['title']] = data.posts[
                    post]["day"][1] - data.posts[post]["day"][2]
        except KeyError:
            continue
            #print significant_posts[data.posts[post]['day']]

    print len(significant_posts)

    word_scores, weights = metrics()

    #query = raw_input("Please enter the query that you would like to rate: ")
    score = 0
    for key, value in significant_posts.iteritems():
        tokens = {}
        token.get_tokens(key, tokens, 0, 1)
        for t in tokens:
            score += np.log10(word_scores[t] * weights[t])
        print key
        print "Post Score: " + str(value) + " Score: " + str(
            score / len(tokens)) + "\n"
Example #2
0
def evaluate(category_probabilities, post):
    prob_per_category = defaultdict(float)

    total_value = 0
    for key, value in category_probabilities.iteritems():
        total_value += len(value)

    tokens = {}
    token.get_tokens(data.posts[post]['title'], tokens, 0, 0)

    for t in tokens:
        for key, value in category_probabilities.iteritems():
            if t in value:
                prob_per_category[key] += math.log(value[t])

    for key, value in prob_per_category.iteritems():
        print len(category_probabilities[key]) / total_value
        value *= len(category_probabilities[key]) / total_value

    #DEBUG OUTPUT
    if True:
        print "Probability of true:\t" + str(prob_per_category["successful"])
        print "Probability of lie:\t" + str(prob_per_category["fail"])

    return min(prob_per_category.iteritems(), key=operator.itemgetter(1))[0]
Example #3
0
def evaluate(category_probabilities, post):
	prob_per_category = defaultdict(float)

	total_value = 0
	for key, value in category_probabilities.iteritems():
		total_value += len(value)


	tokens = {}
	token.get_tokens(data.posts[post]['title'], tokens, 0, 0)

	for t in tokens:
		for key, value in category_probabilities.iteritems():
			if t in value:
				prob_per_category[key] += math.log(value[t])



	for key, value in prob_per_category.iteritems():
		print len(category_probabilities[key])/total_value
		value *= len(category_probabilities[key])/total_value

	#DEBUG OUTPUT
	if True:
		print "Probability of true:\t" + str(prob_per_category["successful"])
		print "Probability of lie:\t" + str(prob_per_category["fail"])

	return min(prob_per_category.iteritems(), key=operator.itemgetter(1))[0]
Example #4
0
def vocabulary(posts):
    vocab = defaultdict(int)
    for post in posts:
        tokens = {}
        token.get_tokens(data.posts[post]['title'], tokens, 0, 1)

        for t in tokens:
            vocab[t] += 1

    return vocab
Example #5
0
def vocabulary(posts):
	vocab = defaultdict(int)
	for post in posts:
		tokens = {}
		token.get_tokens(data.posts[post]['title'], tokens, 0, 1)

		for t in tokens:
			vocab[t] += 1

	return vocab
Example #6
0
def main():
	significant_posts = defaultdict(int)
	for post in data.posts:
		try:
			if data.posts[post]["day"][1] - data.posts[post]["day"][2] >= threshold:
				significant_posts[data.posts[post]['title']] = data.posts[post]["day"][1] - data.posts[post]["day"][2]
		except KeyError:
			continue
			#print significant_posts[data.posts[post]['day']]

	print len(significant_posts)

	word_scores, weights = metrics()

	#query = raw_input("Please enter the query that you would like to rate: ")
	score = 0
	for key, value in significant_posts.iteritems():
		tokens = {}
		token.get_tokens(key, tokens, 0, 1)
		for t in tokens:
			score += np.log10(word_scores[t] * weights[t])
		print key
		print "Post Score: " + str(value) + " Score: " + str(score/len(tokens)) + "\n"
Example #7
0
def metrics():
    word_scores = defaultdict(int)
    num_words = defaultdict(int)
    total_num_words = 0
    highest_post = 0
    title = ""

    for post in data.posts:
        tokens = {}
        try:
            #print post
            #print data.posts[post]['title']
            token.get_tokens(data.posts[post]['title'], tokens, 0, 1)
            for t in tokens:
                #Get upvotes
                word_scores[t] += data.posts[post]['day'][1]
                word_scores[t] -= data.posts[post]['day'][2]
                num_words[t] += 1
                total_num_words += 1
            if data.posts[post]["day"][1] > highest_post:
                title = post
                highest_post = data.posts[post]["day"][1]
        except KeyError:
            continue

    weights = defaultdict(float)

    for key, value in num_words.iteritems():
        #weights[key] = np.log10(total_num_words/float(value)+len(num_words));
        weights[key] = 1 - (float(value) / total_num_words)

    for key, value in word_scores.iteritems():
        word_scores[key] = value / num_words[key]

    print highest_post
    print title
    return word_scores, weights
Example #8
0
def metrics():
	word_scores = defaultdict(int)
	num_words = defaultdict(int)
	total_num_words = 0
	highest_post = 0
	title = ""

	for post in data.posts:
		tokens = {}
		try:
			#print post
			#print data.posts[post]['title']
			token.get_tokens(data.posts[post]['title'], tokens, 0, 1)
			for t in tokens:
				#Get upvotes
				word_scores[t] += data.posts[post]['day'][1]
				word_scores[t] -= data.posts[post]['day'][2]
				num_words[t] += 1
				total_num_words += 1
			if data.posts[post]["day"][1] > highest_post:
				title = post
				highest_post = data.posts[post]["day"][1]
		except KeyError:
			continue

	weights = defaultdict(float)

	for key, value in num_words.iteritems():
		#weights[key] = np.log10(total_num_words/float(value)+len(num_words));
		weights[key] = 1 - (float(value)/total_num_words)

	for key, value in word_scores.iteritems():
		word_scores[key] = value / num_words[key]

	print highest_post
	print title
	return word_scores, weights