def main(): significant_posts = defaultdict(int) for post in data.posts: try: if data.posts[post]["day"][1] - data.posts[post]["day"][ 2] >= threshold: significant_posts[data.posts[post]['title']] = data.posts[ post]["day"][1] - data.posts[post]["day"][2] except KeyError: continue #print significant_posts[data.posts[post]['day']] print len(significant_posts) word_scores, weights = metrics() #query = raw_input("Please enter the query that you would like to rate: ") score = 0 for key, value in significant_posts.iteritems(): tokens = {} token.get_tokens(key, tokens, 0, 1) for t in tokens: score += np.log10(word_scores[t] * weights[t]) print key print "Post Score: " + str(value) + " Score: " + str( score / len(tokens)) + "\n"
def evaluate(category_probabilities, post): prob_per_category = defaultdict(float) total_value = 0 for key, value in category_probabilities.iteritems(): total_value += len(value) tokens = {} token.get_tokens(data.posts[post]['title'], tokens, 0, 0) for t in tokens: for key, value in category_probabilities.iteritems(): if t in value: prob_per_category[key] += math.log(value[t]) for key, value in prob_per_category.iteritems(): print len(category_probabilities[key]) / total_value value *= len(category_probabilities[key]) / total_value #DEBUG OUTPUT if True: print "Probability of true:\t" + str(prob_per_category["successful"]) print "Probability of lie:\t" + str(prob_per_category["fail"]) return min(prob_per_category.iteritems(), key=operator.itemgetter(1))[0]
def evaluate(category_probabilities, post): prob_per_category = defaultdict(float) total_value = 0 for key, value in category_probabilities.iteritems(): total_value += len(value) tokens = {} token.get_tokens(data.posts[post]['title'], tokens, 0, 0) for t in tokens: for key, value in category_probabilities.iteritems(): if t in value: prob_per_category[key] += math.log(value[t]) for key, value in prob_per_category.iteritems(): print len(category_probabilities[key])/total_value value *= len(category_probabilities[key])/total_value #DEBUG OUTPUT if True: print "Probability of true:\t" + str(prob_per_category["successful"]) print "Probability of lie:\t" + str(prob_per_category["fail"]) return min(prob_per_category.iteritems(), key=operator.itemgetter(1))[0]
def vocabulary(posts): vocab = defaultdict(int) for post in posts: tokens = {} token.get_tokens(data.posts[post]['title'], tokens, 0, 1) for t in tokens: vocab[t] += 1 return vocab
def main(): significant_posts = defaultdict(int) for post in data.posts: try: if data.posts[post]["day"][1] - data.posts[post]["day"][2] >= threshold: significant_posts[data.posts[post]['title']] = data.posts[post]["day"][1] - data.posts[post]["day"][2] except KeyError: continue #print significant_posts[data.posts[post]['day']] print len(significant_posts) word_scores, weights = metrics() #query = raw_input("Please enter the query that you would like to rate: ") score = 0 for key, value in significant_posts.iteritems(): tokens = {} token.get_tokens(key, tokens, 0, 1) for t in tokens: score += np.log10(word_scores[t] * weights[t]) print key print "Post Score: " + str(value) + " Score: " + str(score/len(tokens)) + "\n"
def metrics(): word_scores = defaultdict(int) num_words = defaultdict(int) total_num_words = 0 highest_post = 0 title = "" for post in data.posts: tokens = {} try: #print post #print data.posts[post]['title'] token.get_tokens(data.posts[post]['title'], tokens, 0, 1) for t in tokens: #Get upvotes word_scores[t] += data.posts[post]['day'][1] word_scores[t] -= data.posts[post]['day'][2] num_words[t] += 1 total_num_words += 1 if data.posts[post]["day"][1] > highest_post: title = post highest_post = data.posts[post]["day"][1] except KeyError: continue weights = defaultdict(float) for key, value in num_words.iteritems(): #weights[key] = np.log10(total_num_words/float(value)+len(num_words)); weights[key] = 1 - (float(value) / total_num_words) for key, value in word_scores.iteritems(): word_scores[key] = value / num_words[key] print highest_post print title return word_scores, weights
def metrics(): word_scores = defaultdict(int) num_words = defaultdict(int) total_num_words = 0 highest_post = 0 title = "" for post in data.posts: tokens = {} try: #print post #print data.posts[post]['title'] token.get_tokens(data.posts[post]['title'], tokens, 0, 1) for t in tokens: #Get upvotes word_scores[t] += data.posts[post]['day'][1] word_scores[t] -= data.posts[post]['day'][2] num_words[t] += 1 total_num_words += 1 if data.posts[post]["day"][1] > highest_post: title = post highest_post = data.posts[post]["day"][1] except KeyError: continue weights = defaultdict(float) for key, value in num_words.iteritems(): #weights[key] = np.log10(total_num_words/float(value)+len(num_words)); weights[key] = 1 - (float(value)/total_num_words) for key, value in word_scores.iteritems(): word_scores[key] = value / num_words[key] print highest_post print title return word_scores, weights