def test_review(): txt = "I don't love her any more" r = Review('abc',txt) d = r.to_dict() cpyr = Review.from_dict(d) print cpyr.to_dict()
def test_review(): txt = "I don't love her any more" r = Review('abc', txt) d = r.to_dict() cpyr = Review.from_dict(d) print cpyr.to_dict()
def sentence_stream(colname): client = pymongo.MongoClient() collection = client.popcorn[colname] cursor = collection.find({}, {'words':0 }) for d in cursor: yield Review.from_dict(d) client.close()
def sentence_stream(colname): client = pymongo.MongoClient() collection = client.popcorn[colname] cursor = collection.find({}, {'words': 0}) for d in cursor: yield Review.from_dict(d) client.close()
def get_all_known_aspect_sentences(dbname): client = MongoClient() review_collection = client[dbname]["reviews"] query_condition = {"sentences": {'$elemMatch': {'aspect': {'$ne':'Unknown'}} } } cursor = review_collection.find(query_condition,{"sentences":1}) for d in cursor: review = Review.from_dict(d) for sentence in review.sentences: if sentence.aspect != "Unknown": yield sentence client.close()
def test_load_review_words(): client = MongoClient() collection = client['airbnb']['reviews'] cursor = collection.find({}) for index in xrange(10): d = next(cursor) review = Review.from_dict(d) print "*************** {} ***************".format(index+1) print "raw: {}".format(review.sent.raw) print "words: {}".format(review.sent.words) client.close()
def load_reviews_save_sentiment_sentences(dbname,classifier): client = MongoClient() db = client[dbname] reviews_collection = db["reviews"] sentisent_collection = db["sentiment_sentences"] review_cursor = reviews_collection.find({}) for index,rd in enumerate(review_cursor): review = Review.from_dict(rd) sentence_dicts = [ s.to_dict() for s in review_to_sentences(review,classifier) ] if len(sentence_dicts)>0: sentisent_collection.insert_many(sentence_dicts) print "{}-th review extract {} sentences and saved".format(index+1,len(sentence_dicts)) client.close()
def update_add_neg_suffix(dbname,query_condition): stop_words = common.make_stop_words() client = MongoClient() review_collection = client[dbname]['reviews'] cursor = review_collection.find(query_condition,{"sentences.raw":1,"sentences.words":1}) for rindex,rd in enumerate(cursor): review = Review.from_dict(rd) update_content = {} for sindex,sent in enumerate(review.sentences): new_sent = Sentence.from_raw(sent.raw,stop_words) if set(new_sent.words) != set(sent.words): update_content["sentences.{}.words".format(sindex)] = new_sent.words if len(update_content)>0: result = review_collection.update_one({"_id":review.id},{"$set":update_content}) if result.modified_count != 1: raise Exception("failed to update review<{}>".format(review.id)) print "{}-th review updated {} sentences".format(rindex+1,len(update_content)) client.close()