コード例 #1
0
ファイル: tests.py プロジェクト: stasi009/MyKaggle
def test_review():
    txt = "I don't love her any more"
    r = Review('abc',txt)
    d = r.to_dict()

    cpyr = Review.from_dict(d)
    print cpyr.to_dict()
コード例 #2
0
def test_review():
    txt = "I don't love her any more"
    r = Review('abc', txt)
    d = r.to_dict()

    cpyr = Review.from_dict(d)
    print cpyr.to_dict()
コード例 #3
0
def sentence_stream(colname):
    client = pymongo.MongoClient()
    collection = client.popcorn[colname]

    cursor = collection.find({}, {'words':0 })
    for d in cursor:
        yield Review.from_dict(d)

    client.close()
コード例 #4
0
def sentence_stream(colname):
    client = pymongo.MongoClient()
    collection = client.popcorn[colname]

    cursor = collection.find({}, {'words': 0})
    for d in cursor:
        yield Review.from_dict(d)

    client.close()
コード例 #5
0
ファイル: db_tasks.py プロジェクト: stasi009/OpinionMining728
def get_all_known_aspect_sentences(dbname):
    client = MongoClient()
    review_collection = client[dbname]["reviews"]

    query_condition = {"sentences": {'$elemMatch': {'aspect': {'$ne':'Unknown'}}   }   }
    cursor = review_collection.find(query_condition,{"sentences":1})
    for d in cursor:
        review = Review.from_dict(d)
        for sentence in review.sentences:
            if sentence.aspect != "Unknown":
                yield sentence

    client.close()
コード例 #6
0
ファイル: tests.py プロジェクト: stasi009/LearnMyMachine
def test_load_review_words():
    client = MongoClient()
    collection = client['airbnb']['reviews']
    cursor = collection.find({})

    for index in xrange(10):
        d = next(cursor)
        review = Review.from_dict(d)
        print "*************** {} ***************".format(index+1)
        print "raw: {}".format(review.sent.raw)
        print "words: {}".format(review.sent.words)

    client.close()
コード例 #7
0
def load_reviews_save_sentiment_sentences(dbname,classifier):
    client = MongoClient()
    db = client[dbname]
    reviews_collection = db["reviews"]
    sentisent_collection = db["sentiment_sentences"]

    review_cursor = reviews_collection.find({})
    for index,rd in enumerate(review_cursor):
        review = Review.from_dict(rd)

        sentence_dicts  = [ s.to_dict() for s in review_to_sentences(review,classifier) ]
        if len(sentence_dicts)>0:
            sentisent_collection.insert_many(sentence_dicts)

        print "{}-th review extract {} sentences and saved".format(index+1,len(sentence_dicts))

    client.close()
コード例 #8
0
ファイル: db_tasks.py プロジェクト: stasi009/OpinionMining728
def update_add_neg_suffix(dbname,query_condition):
    stop_words = common.make_stop_words()
    client = MongoClient()
    review_collection = client[dbname]['reviews']

    cursor = review_collection.find(query_condition,{"sentences.raw":1,"sentences.words":1})
    for rindex,rd in enumerate(cursor):
        review = Review.from_dict(rd)

        update_content = {}
        for sindex,sent in enumerate(review.sentences):
            new_sent = Sentence.from_raw(sent.raw,stop_words)
            if set(new_sent.words) != set(sent.words):
                update_content["sentences.{}.words".format(sindex)] = new_sent.words

        if len(update_content)>0:
            result = review_collection.update_one({"_id":review.id},{"$set":update_content})
            if result.modified_count != 1:
                raise Exception("failed to update review<{}>".format(review.id))

        print "{}-th review updated {} sentences".format(rindex+1,len(update_content))

    client.close()