def test_insert_media(self): """ Tests if media is inserted and retrieved correctly """ # can we find the movie? bat = queries.find_media_by_asin('0440419395') self.is_batman_movie(bat) bat = queries.find_media_by_title('Batman: The Return of the Force') self.is_batman_movie(bat[0]) bat = queries.find_media_by_creator('Dario') self.is_batman_movie(bat[0])
def test_clean(self): """ Tests cleaning the database of comments and emotions """ bat = queries.find_media_by_asin('0440419395') queries.insert_comment(bat.media_id, 11, 7, 0.9, 0.4, -0.5, 0.6, 0.3) queries.clean_media(bat.media_id) comments = queries.find_comments_for_media(bat.media_id) emotions = queries.find_emotions_for_media(bat.media_id) self.assertEqual(len(comments), 0) self.assertEqual(len(emotions), 0)
def handleReview(asin, list_of_review_dicts, productapi, producttype): global i product_dict = dict() product_dict["comments"] = list() try: product_dict = add_amazon_info_to_dict(asin, product_dict) except AmazonInfoNotFoundError: print("Couldn't find amazon info for product", i, " skipping") return # add the ASIN to the dict product_dict["asin"] = asin product_dict["type"] = producttype product = queries.find_media_by_asin(asin) if product is None: queries.insert_media(product_dict["title"], product_dict["creator"], product_dict["description"], producttype, asin, int(time.time())) else: queries.clean_media(product.media_id) queries.update_media(product.media_id, int(time.time())) for review in list_of_review_dicts: comment_dict = dict() # if these dont exist in some of them, then so help me god comment_dict["rating"] = review["overall"] comment_dict["helpful"] = review["helpful"] comment_dict["unixtime"] = int(review["unixReviewTime"]) comment_dict["text"] = review["reviewText"] product_dict["comments"].append(comment_dict) # now process this dict in comment_processing filename = product_dict["title"] + "$$$" + asin processed_dict_dict = dict() try: processed_dict = calculateVectorsForAllComments(product_dict, g) except NoEmotionsFoundError: # REMOVE the media from the table since we don't want it anymore queries.remove_media(asin) print("couldnt find any emotions for product: ", i, "Skipping") return #create the summary processed_dict["summary"] = html.unescape(return_summary(processed_dict)) processed_json = json.dumps(processed_dict, indent=4) print ("Adding product with asin: ", asin, "to S3 ---", i) push_to_S3(filename, processed_json)
def test_updating_media(self): """ Tests updating media and media emotions """ bat = queries.find_media_by_asin('0440419395') # testing updating the emotions queries.insert_media_emotion(bat.media_id, 'cool') queries.insert_media_emotion(bat.media_id, 'dark') emotions = queries.find_emotions_for_media(bat.media_id) self.assertTrue('dark' in emotions, \ "Didn't find all emotions for media") self.assertTrue('cool' in emotions, \ "Didn't find all emotions for media") # test updating the last_updated column queries.update_media(bat.media_id, 20) self.assertEqual(bat.last_updated, 20, \ 'Did not updated date properly')
def calculateVectorsForAllComments(dictFromJSON, g): compound_emotion_dict = collections.defaultdict(int) sentic_emotion_dict = collections.defaultdict(int) processed_comments = list() overall_rating = 0.0 # the product model from the DB product = queries.find_media_by_asin(dictFromJSON["asin"]) tokenized_docs = buildListOfTokenizedDocuments(dictFromJSON) for comment in dictFromJSON["comments"]: vectorized_comment = calculateVector(tokenizeDocument(comment["text"]), tokenized_docs) vectorized_desc = calculateVector(tokenizeDocument(dictFromJSON["description"]), tokenized_docs) comment["vector_space"] = vectorized_comment relevancy = getCosine(vectorized_comment, vectorized_desc) if relevancy < 0.15: continue comment["relevancy"] = relevancy # add emotional score try: comment_emotion = emotions(comment["text"], g) except ConceptError: print("Not enough concepts to do anything useful - skipping this product") continue comment["emotion_vector"] = comment_emotion.emotion_vector compound_emotions = comment_emotion.get_compound_emotion() sentic_values = comment_emotion.get_all_sentic_values() sentic_values = [value.name for value in sentic_values if value is not None] compound_emotions_list = [] for compound_emotion, strength in compound_emotions: compound_emotions_list.append( {"compound_emotion": compound_emotion.name, "strength": strength.name} ) comment["compound_emotions"] = compound_emotions_list comment["sentic_emotions"] = sentic_values # add all compound_emotions to the default dictFromJSON for compound in comment["compound_emotions"]: compound_emotion_dict[compound["compound_emotion"]] += 1 for sentic in comment["sentic_emotions"]: sentic_emotion_dict[sentic] += 1 overall_rating += float(comment["rating"]) # add the comment to the database # insert_comment(item_id, relevancy, pleasantness, attention, sensitivity, aptitude, polarity, date) queries.insert_comment(product.media_id, comment["unixtime"], comment["relevancy"], comment["emotion_vector"]["pleasantness"], comment["emotion_vector"]["attention"], comment["emotion_vector"]["sensitivity"], comment["emotion_vector"]["aptitude"], comment["emotion_vector"]["polarity"]) comment["text"] = html.unescape(comment["text"]) processed_comments.append(comment) popular_compound_emotions = [] if len(compound_emotion_dict) == 0: raise NoEmotionsFoundError("No compound emotions found") for i in range(0, 3): popular_emotion = max(compound_emotion_dict, key=compound_emotion_dict.get) popular_compound_emotions.append(popular_emotion) compound_emotion_dict.pop(popular_emotion) popular_sentic_emotions = [] if len(sentic_emotion_dict) == 0: raise NoEmotionsFoundError("No sentic emotions found") for i in range(0, 3): popular_sentic = max(sentic_emotion_dict, key=sentic_emotion_dict.get) popular_sentic_emotions.append(popular_sentic) sentic_emotion_dict.pop(popular_sentic) # add emotion to the database queries.insert_media_emotion(product.media_id, popular_sentic) dictFromJSON["popular_compound_emotions"] = popular_compound_emotions dictFromJSON["popular_sentic_emotions"] = popular_sentic_emotions if len(processed_comments) > 0: rating = overall_rating / len(processed_comments) rating = float("{0:.2f}".format(rating)) dictFromJSON["overall_rating"] = rating dictFromJSON["comments"] = sort_list_of_dicts(processed_comments) # get rid of escaped html characters in description dictFromJSON["description"] = html.unescape(dictFromJSON["description"]) return dictFromJSON