def POST(self): # accepts the human marked form # takes the YES/NO signal and if YES sends to Solr # either way uses the YES/NO to train another epoch of the # machine learning algorithm. The constants will be stored in # the database. web.header('Content-type', 'application/json') request_input = web.input(answer='', url='', title='', image='', emotional_score=-1, quality_score=-1, body='') url = request_input.url title = request_input.title image = request_input.image emotional_score = request_input.emotional_score quality_score = request_input.quality_score body = request_input.body answer = request_input.answer if answer == '' or url == '' or title == '' or image == '' or emotional_score == -1 or quality_score == -1 or body == '': return web.badrequest() if "yes" == answer: Store.Record.create(CosineSimilarity.text_to_vector(body), 1) Scraper.store_in_solr(url, emotional_score, quality_score, (title, body, image)) else: Store.Record.create(CosineSimilarity.text_to_vector(body), 0)
def get_quality_score(self, bodyList): # we will measure quality by looking at relevancy and the quality of the writing # relevancy will be judged as a normalized score. # return reduce(lambda acc, x: acc + CosineSimilarity.similarity(x, GOLD_STD), bodyList, 0) cosine_sim_value = reduce(lambda acc, x: acc + CosineSimilarity.similarity(x, GOLD_STD), bodyList, 0) nearest_neighbor_class = (NearestNeighborFilter()).classify(CosineSimilarity.text_to_vector(" ".join(bodyList))) nearest_neighbor_class = nearest_neighbor_class[0] # get the actual value. # we are going to return a weighted average of the two filters print "Combined score: {0}".format(0.58 * cosine_sim_value + 0.42 * nearest_neighbor_class) return 0.58 * cosine_sim_value + 0.42 * nearest_neighbor_class