Ejemplo n.º 1
0
class UserUpdater:
    def __init__(self):
        self.query = MongoQuery()
        self.users = None
        self.logger = logging.Logger('UserUpdateLogger', level=logging.INFO)
        self.logger.addHandler(
            logging.FileHandler(filename=os.path.join(LOG_PATH,
                                                      'user_update.log'),
                                mode='a+'))

    def get_all_users(self):
        if not self.users:
            where = [('review_count', {
                "$ne": 0
            }), ('average_usefulness', {
                "$exists": False
            })]
            projection = ['user_id', 'votes', 'review_count']
            collection = 'user'
            self.users = list(
                self.query.find_all_by(collection_name=collection,
                                       query_list=where,
                                       fields=projection))

        return self.users

    def update_user(self, user, avg_usr_usefulness):
        runtime = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')

        user_id = user['user_id']
        query_list = [('user_id', user_id)]
        set_list = [('average_usefulness', avg_usr_usefulness)]
        col = 'user'

        self.logger.info(runtime + " - Set average_usefulness = " +
                         avg_usr_usefulness + " to user with ID: " + user_id)
        self.query.find_and_update(col, query_list, set_list)

    def update_all_users_with_user_usefulness(self):
        users = self.get_all_users()
        print(len(users))

        counter = 1
        for user in users:
            counter += 1

            avg_usr_usefulness = self.calculate_average_usefulness(user)
            self.update_user(user, avg_usr_usefulness)

            if counter % 1000 == 0:
                print(str(counter) + " users updated.")

        self.logger.info("Execution finished. " + str(counter) +
                         " users updated.")
        return users

    @staticmethod
    def calculate_average_usefulness(user):
        return "%.2f" % (user['votes']['useful'] / user['review_count'])
Ejemplo n.º 2
0
 def __init__(self):
     self.query = MongoQuery()
     self.users = None
     self.logger = logging.Logger('UserUpdateLogger', level=logging.INFO)
     self.logger.addHandler(
         logging.FileHandler(filename=os.path.join(LOG_PATH,
                                                   'user_update.log'),
                             mode='a+'))
Ejemplo n.º 3
0
 def __init__(self):
     self.alchemy_api = AlchemyAPI()
     self.query = MongoQuery()
     self.collection_name = 'review_category'
     self.max_reviews_per_business = 1000
     self.top_businesses_limit = 10
     self.logger = logging.Logger('NLPLogger', level=logging.INFO)
     self.logger.addHandler(
         logging.FileHandler(filename=os.path.join(LOG_PATH, 'nlp.log'),
                             mode='a+'))
     self.__broadcaster = None
Ejemplo n.º 4
0
 def __init__(self):
     self.alchemy_api = AlchemyAPI()
     self.query = MongoQuery()
     self.collection_name = 'review_category'
     self.max_reviews_per_business = 1000
     self.top_businesses_limit = 10
     self.logger = logging.Logger('NLPLogger', level=logging.INFO)
     self.logger.addHandler(logging.FileHandler(filename=os.path.join(LOG_PATH, 'nlp.log'), mode='a+'))
     self.__broadcaster = None
Ejemplo n.º 5
0
class NLPHandler(object):
    def __init__(self):
        self.alchemy_api = AlchemyAPI()
        self.query = MongoQuery()
        self.collection_name = 'review_category'
        self.max_reviews_per_business = 1000
        self.top_businesses_limit = 10
        self.logger = logging.Logger('NLPLogger', level=logging.INFO)
        self.logger.addHandler(
            logging.FileHandler(filename=os.path.join(LOG_PATH, 'nlp.log'),
                                mode='a+'))
        self.__broadcaster = None

    def set_broadcaster(self, b):
        self.__broadcaster = b
        #self.__broadcaster.broadcast_message("Test log...")

    def get_combined_result(self, review_text=''):
        opts = {'sentiment': 1}
        response = self.alchemy_api.combined('text', review_text, options=opts)

        if response['status'] == 'OK':
            return response
        else:
            raise NLPValueError('Error in combined call: ' +
                                response['statusInfo'])

    def get_sentiment_result(self, review_text=''):
        response = self.alchemy_api.sentiment('text', review_text)
        if response['status'] == 'OK':
            return response
        else:
            raise NLPValueError('Error in sentiment analysis call: ' +
                                response['statusInfo'])

    def create_mixed_collection(self):
        db_connector = DBConnector(os.path.join(CONFIG_PATH, 'config.json'))
        db_connector.connect()
        db_connector.reset_database_name('yelp', 'review_category')
        t_collection = db_connector.get_database_name('yelp',
                                                      'review_category')

        business_fields = ['business_id', 'categories']
        user_fields = ['user_id', 'elite', 'votes']
        review_fields = [
            'business_id', 'review_id', 'text', 'user_id', 'stars', 'date'
        ]

        col_name = 'review'
        documents = self.query.find_all(col_name, review_fields)

        index = 0
        batch_number = 5000

        batch_documents = [i for i in range(batch_number)]

        for review_doc in documents:
            try:
                business_id = review_doc['business_id']
                review_id = review_doc['review_id']
                text = review_doc['text']
                user_id = review_doc['user_id']
                stars = review_doc['stars']
                date = review_doc['date']

                business_doc = self.query.find_one(
                    'business', [('business_id', business_id)],
                    business_fields)
                categories = business_doc['categories']

                user_doc = self.query.find_one('user', [('user_id', user_id)],
                                               user_fields)
                elite = len(user_doc['elite'])
                useful = user_doc['votes']['useful']

                new_doc = {
                    'review_id': review_id,
                    'business_id': business_id,
                    'user_id': user_id,
                    'elite': elite,
                    'useful': useful,
                    'categories': categories,
                    'text': text,
                    'stars': stars,
                    'date': date,
                }

                batch_documents[index % batch_number] = new_doc

                if (index + 1) % batch_number == 0:
                    t_collection.insert(batch_documents)
                    print("\n" + str(index + 1) + "\n")
                index += 1
            except:
                print 'Unexpected error:', sys.exc_info(
                )[0], ', for index ', index
                raise
        db_connector.disconnect()

    def find_top_category(self):
        pipeline = [{
            "$unwind": "$categories"
        }, {
            "$group": {
                "_id": "$categories",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }, {
            "$limit": 1
        }]
        return self.query.aggregate(self.collection_name, pipeline,
                                    True)[0]['_id']

    def find_top_businesses_of_category(self, category, top_businesses=10):
        pipeline = [{
            "$unwind": "$categories"
        }, {
            "$match": {
                "categories": category
            }
        }, {
            "$group": {
                "_id": "$business_id",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }, {
            "$match": {
                "count": {
                    "$lte": self.max_reviews_per_business
                }
            }
        }, {
            "$limit": top_businesses
        }]
        return self.query.aggregate(self.collection_name, pipeline, True)

    def update_mixed_collection_with_sentiment(self, business_doc):
        counter = 0

        query_list = [('business_id', business_doc['_id'])]
        field_list = ['review_id', 'text', 'sentiment']

        reviews = list(
            self.query.find_all_by(self.collection_name, query_list,
                                   field_list))

        for review in reviews:
            if 'sentiment' not in review:
                review_id = review['review_id']
                sentiment = None
                try:
                    sentiment = self.get_sentiment_result(
                        review['text'])['docSentiment']
                except NLPValueError as err:
                    print("{} is having problem with err: {}".format(
                        review_id, err.message))

                if sentiment != None:
                    query_list = [('review_id', review_id)]
                    set_list = [('sentiment', sentiment)]

                    self.query.find_and_update(self.collection_name,
                                               query_list, set_list)

                    counter += 1
                    if counter % 50 == 0:
                        print(str(counter) + " reviews updated.")

    def print_uncalled_top_ten_business(self):
        top_category = self.find_top_category()
        top_ten_business_id_docs = self.find_top_businesses_of_category(
            top_category, self.top_businesses_limit)

        count = 0
        for business in top_ten_business_id_docs:
            query_list = [('business_id', business['_id'])]
            field_list = ['sentiment']

            reviews = list(
                self.query.find_all_by(self.collection_name, query_list,
                                       field_list))

            for review in reviews:
                if 'sentiment' not in review:
                    count += 1

            print(
                str(count) + " reviews not updated for business " +
                str(business['_id']))
            count = 0

    # CAREFUL WITH THIS ONE!!! IT HAS MEMORY ISSUES - RUN IT MANY TIMES AND CLEAR CACHE OF PC EVERYTIME
    # IF IT TAKES TOO MUCH TIME, IT NEEDS INDEX
    def update_mixed_collection_with_review_votes(self):
        counter = 0

        review_fields = ['votes']
        review_category_fields = ['review_id']
        review_category_query = [('review_useful', {"$exists": False})]

        review_categories = list(
            self.query.find_all_by(self.collection_name, review_category_query,
                                   review_category_fields))

        for review_category in review_categories:
            review_id = review_category['review_id']

            query_list = [('review_id', review_id)]
            review = self.query.find_one('review', query_list, review_fields)

            set_list = [('review_useful', review['votes']['useful'])]
            self.query.find_and_update(self.collection_name, query_list,
                                       set_list)

            counter += 1
            if counter % 10000 == 0:
                print(str(counter) + " reviews finished.")

    def run_handler(self):
        top_category = self.find_top_category()
        top_ten_business_id_docs = self.find_top_businesses_of_category(
            top_category, self.top_businesses_limit)

        for business in top_ten_business_id_docs:
            run_time = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
            self.logger.info(run_time + " - Business '" +
                             str(business['_id']) + "' started.")

            print(
                "Starting AlchemyAPI calls. Please check nlp.log inside 'logs' folder for business_id"
            )

            try:
                self.update_mixed_collection_with_sentiment(business)
                self.logger.info("Business " + str(business['_id']) +
                                 " finished.")
            except NLPValueError as err:
                self.logger.exception(run_time + " - " + str(err.message))

    def run2_handler(self):
        business_id = BUS_ID
        limit = 200
        run_info = {}
        run_info['run_date'] = datetime.now().strftime('%Y/%m/%d %H:%M:%S')

        #dbc.find({"business_id": "zTCCbg7mGslxACL5KlAPIQ"}).sort("date", 1).limit(20)
        query_list = [('business_id', business_id)]
        projections = ['review_id', 'text', 'combined_result']
        documents = self.query.find_all_by('review_category', query_list,
                                           projections)

        total_reviews = documents.count()
        run_info['total_review'] = total_reviews
        success = 0
        failure_reviews = []

        print("get uncalled review: {}".format(documents.count()))
        i = 0
        for doc in documents:
            review_id = doc['review_id']

            if 'combined_result' in doc:
                continue

            result = self.get_combined_result(doc['text'])
            if type(result) == 'NLPValueError':  # fail
                failure_reviews.append(review_id)
            else:
                data = dict()
                val = None
                if 'entities' in result:
                    val = result['entities']
                else:
                    val = []

                data['entities'] = val

                if 'concepts' in result:
                    val = result['concepts']
                else:
                    val = []

                data['concepts'] = val

                if 'keywords' in result:
                    val = result['keywords']
                else:
                    val = []

                data['keywords'] = val

                if 'taxonomy' in result:
                    val = result['taxonomy']
                else:
                    val = []

                data['taxonomy'] = val

                query_list = [('review_id', review_id)]
                set_list = [('combined_result', data)]

                self.query.find_and_update('review_category', query_list,
                                           set_list)

                if success % 50 == 0:
                    print(str(success) + " reviews updated.")

                success += 1
                #pprint(result)

            i += 1
            if i == limit:
                break

        run_info['success'] = success
        run_info['fail'] = total_reviews - success
        if success != total_reviews:
            run_info['failure_ids'] = failure_reviews

        pprint(run_info)

        with open('./run2_log.txt', 'w+') as outfile:
            json.dump(run_info, outfile, indent=4, ensure_ascii=False)
        """
        result = self.get_combined_result(doc['text'])
        print(result)

        """

    def tee_perform(self):
        #['u-Gbz-uGIIKC0SN2MwXtLw', 'FyCc8g7LCVpU4BGCz-WUog']
        review_id = 'FyCc8g7LCVpU4BGCz-WUog'
        query_list = [('review_id', review_id)]
        projections = ['review_id', 'text']
        review = self.query.find_one('review_category', query_list,
                                     projections)

        if review is not None:
            sentiment_res = self.get_sentiment_result(review['text'])
            pprint(sentiment_res)
            sentiment = self.get_sentiment_result(
                review['text'])['docSentiment']
            query_list = [('review_id', review_id)]
            set_list = [('sentiment', sentiment)]

            self.query.find_and_update(self.collection_name, query_list,
                                       set_list)
Ejemplo n.º 6
0
 def __init__(self):
     self.query = MongoQuery()
Ejemplo n.º 7
0
class Plotter:
    def __init__(self):
        self.query = MongoQuery()

    def find_top_category(self):
        col_name = 'review_category'
        pipeline = [{
            "$unwind": "$categories"
        }, {
            "$group": {
                "_id": "$categories",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }, {
            "$limit": 1
        }]

        return self.query.aggregate(col_name, pipeline, True)[0]['_id']

    def find_top_businesses_of_category(self, category, top_businesses=3):
        col_name = 'review_category'
        pipeline = [{
            "$unwind": "$categories"
        }, {
            "$match": {
                "categories": category
            }
        }, {
            "$group": {
                "_id": "$business_id",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }, {
            "$limit": top_businesses
        }]
        return self.query.aggregate(col_name, pipeline, True)

    def plot_top_ten_categories(self):
        def percentage(part, whole):
            return int(100 * float(part) / float(whole))

        def calculate_rest_count(categories, whole):
            sum = 0
            for item in categories:
                sum += item['count']
                return sum

        pipeline = [{
            "$unwind": "$categories"
        }, {
            "$group": {
                "_id": "$categories",
                "count": {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "count": -1
            }
        }, {
            "$limit": 9
        }]

        col_name = 'review_category'

        categories = list(self.query.aggregate(col_name, pipeline, True))
        total_categories_no = self.query.count(col_name)
        rest_count = calculate_rest_count(categories, total_categories_no)

        fracs = []
        category_labels = []
        should_explode = True
        should_append_rest = True
        expl = []

        colors = cm.Set1(np.arange(10) / 10.)

        for category in categories:
            if should_explode:
                expl.append(0.1)
                should_explode = False
            else:
                expl.append(0)

            review_count = category['count']

            if review_count < rest_count and should_append_rest:
                fracs.append(percentage(rest_count, total_categories_no))
                category_labels.append("Others")
                expl.append(0)

                should_append_rest = False

            fracs.append(percentage(review_count, total_categories_no))
            category_labels.append(category['_id'])

        labels = tuple(category_labels)
        explode = tuple(expl)

        # make a square figure and axes
        figure(1, figsize=(6, 6))
        ax = axes([0.1, 0.1, 0.8, 0.8])

        pie(fracs,
            colors=colors,
            explode=explode,
            labels=labels,
            autopct='%1.1f%%',
            shadow=True,
            startangle=90)
        # The default startangle is 0, which would start
        # the Frogs slice on the x-axis.  With startangle=90,
        # everything is rotated counter-clockwise by 90 degrees,
        # so the plotting starts on the positive y-axis.
        mpl.rcParams['font.size'] = 15.0
        title('Top Business Categories', bbox={'facecolor': '0.8', 'pad': 5})

        show()

    def plot_top_ten_businesses_of_top_category(self):
        def percentage(part, whole):
            return int(100 * float(part) / float(whole))

        top_category = self.find_top_category()
        top_ten_business_id_docs = self.find_top_businesses_of_category(
            top_category, 3)

        businesses_for_plot = []
        total = 0
        for item in top_ten_business_id_docs:
            business_id = item['_id']
            bus_count = item['count']
            total += bus_count

            business_name = self.query.find_one('business',
                                                [('business_id', business_id)],
                                                ['name'])['name']
            businesses_for_plot.append((bus_count, business_name))

        category_labels = []
        should_explode = True
        counts = []
        expl = []

        colors = cm.Set1(np.arange(3) / 3.)

        for count, name in businesses_for_plot:
            if should_explode:
                expl.append(0.1)
                should_explode = False
            else:
                expl.append(0)

            counts.append(count)

            category_labels.append(name)

        labels = tuple(category_labels)
        business_counts = tuple(counts)
        ind = np.arange(3)  # the x locations for the groups
        width = 0.35  # bar width

        fig, ax = plt.subplots()

        rects1 = ax.bar(
            ind,
            business_counts,  # data
            width,  # bar width
            color=colors,  # bar colour
            error_kw={
                'ecolor': 'Tomato',  # error-bars colour
                'linewidth': 2
            })  # error-bar width

        axes = plt.gca()
        axes.set_ylim([0, 5000])  # y-axis bounds

        ax.set_ylabel('Reviews')
        ax.set_title('Most Popular Businesses')
        ax.set_xticks(ind + width)
        ax.set_xticklabels(labels)

        rect_tuple = tuple(rects1)

        ax.legend(rect_tuple, labels)
        params = {'legend.fontsize': 25, 'legend.linewidth': 3}
        mpl.rcParams.update(params)

        def autolabel(rects):
            for rect in rects:
                height = rect.get_height()
                ax.text(
                    rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    '%d' % int(height),
                    ha='center',  # vertical alignment
                    va='bottom'  # horizontal alignment
                )

        autolabel(rects1)

        plt.show()
Ejemplo n.º 8
0
class NLPHandler(object):
    def __init__(self):
        self.alchemy_api = AlchemyAPI()
        self.query = MongoQuery()
        self.collection_name = 'review_category'
        self.max_reviews_per_business = 1000
        self.top_businesses_limit = 10
        self.logger = logging.Logger('NLPLogger', level=logging.INFO)
        self.logger.addHandler(logging.FileHandler(filename=os.path.join(LOG_PATH, 'nlp.log'), mode='a+'))
        self.__broadcaster = None

    def set_broadcaster(self, b):
        self.__broadcaster = b
        #self.__broadcaster.broadcast_message("Test log...")

    def get_combined_result(self, review_text=''):
        opts = {
            'sentiment': 1
        }
        response = self.alchemy_api.combined('text', review_text, options=opts)

        if response['status'] == 'OK':
            return response
        else:
            raise NLPValueError('Error in combined call: ' + response['statusInfo'])

    def get_sentiment_result(self, review_text=''):
        response = self.alchemy_api.sentiment('text', review_text)
        if response['status'] == 'OK':
            return response
        else:
            raise NLPValueError('Error in sentiment analysis call: ' + response['statusInfo'])

    def create_mixed_collection(self):
        db_connector = DBConnector(os.path.join(CONFIG_PATH, 'config.json'))
        db_connector.connect()
        db_connector.reset_database_name('yelp', 'review_category')
        t_collection = db_connector.get_database_name('yelp', 'review_category')

        business_fields = ['business_id', 'categories']
        user_fields = ['user_id', 'elite', 'votes']
        review_fields = ['business_id', 'review_id', 'text', 'user_id', 'stars', 'date']

        col_name = 'review'
        documents = self.query.find_all(col_name, review_fields)

        index = 0
        batch_number = 5000

        batch_documents = [i for i in range(batch_number)]

        for review_doc in documents:
            try:
                business_id = review_doc['business_id']
                review_id = review_doc['review_id']
                text = review_doc['text']
                user_id = review_doc['user_id']
                stars = review_doc['stars']
                date = review_doc['date']

                business_doc = self.query.find_one('business', [('business_id', business_id)], business_fields)
                categories = business_doc['categories']

                user_doc = self.query.find_one('user', [('user_id', user_id)], user_fields)
                elite = len(user_doc['elite'])
                useful = user_doc['votes']['useful']

                new_doc = {
                    'review_id': review_id,
                    'business_id': business_id,
                    'user_id': user_id,
                    'elite': elite,
                    'useful': useful,
                    'categories': categories,
                    'text': text,
                    'stars': stars,
                    'date': date,
                }

                batch_documents[index % batch_number] = new_doc

                if (index + 1) % batch_number == 0:
                    t_collection.insert(batch_documents)
                    print("\n" + str(index + 1) + "\n")
                index += 1
            except:
                print 'Unexpected error:', sys.exc_info()[0], ', for index ', index
                raise
        db_connector.disconnect()

    def find_top_category(self):
        pipeline = [
            {"$unwind": "$categories"},
            {"$group": {"_id": "$categories", "count": {"$sum": 1}}},
            {"$sort": {"count": -1}},
            {"$limit": 1}
        ]
        return self.query.aggregate(self.collection_name, pipeline, True)[0]['_id']

    def find_top_businesses_of_category(self, category, top_businesses=10):
        pipeline = [
            {"$unwind": "$categories"},
            {"$match": {"categories": category}},
            {"$group": {"_id": "$business_id", "count": {"$sum": 1}}},
            {"$sort": {"count": -1}},
            {"$match": {"count": {"$lte": self.max_reviews_per_business}}},
            {"$limit": top_businesses}
        ]
        return self.query.aggregate(self.collection_name, pipeline, True)

    def update_mixed_collection_with_sentiment(self, business_doc):
        counter = 0

        query_list = [('business_id', business_doc['_id'])]
        field_list = ['review_id', 'text', 'sentiment']

        reviews = list(
                self.query.find_all_by(self.collection_name, query_list, field_list)
        )

        for review in reviews:
            if 'sentiment' not in review:
                review_id = review['review_id']
                sentiment = None
                try:
                    sentiment = self.get_sentiment_result(review['text'])['docSentiment']
                except NLPValueError as err:
                    print("{} is having problem with err: {}".format(review_id, err.message))

                if sentiment != None:
                    query_list = [('review_id', review_id)]
                    set_list = [('sentiment', sentiment)]

                    self.query.find_and_update(self.collection_name, query_list, set_list)

                    counter += 1
                    if counter % 50 == 0:
                        print(str(counter) + " reviews updated.")

    def print_uncalled_top_ten_business(self):
        top_category = self.find_top_category()
        top_ten_business_id_docs = self.find_top_businesses_of_category(top_category, self.top_businesses_limit)

        count = 0
        for business in top_ten_business_id_docs:
            query_list = [('business_id', business['_id'])]
            field_list = ['sentiment']

            reviews = list(
                    self.query.find_all_by(self.collection_name, query_list, field_list)
            )

            for review in reviews:
                if 'sentiment' not in review:
                    count += 1

            print(str(count) + " reviews not updated for business " + str(business['_id']))
            count = 0

    # CAREFUL WITH THIS ONE!!! IT HAS MEMORY ISSUES - RUN IT MANY TIMES AND CLEAR CACHE OF PC EVERYTIME
    # IF IT TAKES TOO MUCH TIME, IT NEEDS INDEX
    def update_mixed_collection_with_review_votes(self):
        counter = 0

        review_fields = ['votes']
        review_category_fields = ['review_id']
        review_category_query = [('review_useful', {"$exists": False})]

        review_categories = list(
                self.query.find_all_by(self.collection_name, review_category_query, review_category_fields)
        )

        for review_category in review_categories:
            review_id = review_category['review_id']

            query_list = [('review_id', review_id)]
            review = self.query.find_one('review', query_list, review_fields)

            set_list = [('review_useful', review['votes']['useful'])]
            self.query.find_and_update(self.collection_name, query_list, set_list)

            counter += 1
            if counter % 10000 == 0:
                print(str(counter) + " reviews finished.")

    def run_handler(self):
        top_category = self.find_top_category()
        top_ten_business_id_docs = self.find_top_businesses_of_category(top_category, self.top_businesses_limit)

        for business in top_ten_business_id_docs:
            run_time = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
            self.logger.info(run_time + " - Business '" + str(business['_id']) + "' started.")

            print("Starting AlchemyAPI calls. Please check nlp.log inside 'logs' folder for business_id")

            try:
                self.update_mixed_collection_with_sentiment(business)
                self.logger.info("Business " + str(business['_id']) + " finished.")
            except NLPValueError as err:
                self.logger.exception(run_time + " - " + str(err.message))

    def run2_handler(self):
        business_id = BUS_ID
        limit = 200
        run_info = {}
        run_info['run_date'] = datetime.now().strftime('%Y/%m/%d %H:%M:%S')

        #dbc.find({"business_id": "zTCCbg7mGslxACL5KlAPIQ"}).sort("date", 1).limit(20)
        query_list = [('business_id', business_id)]
        projections = ['review_id', 'text', 'combined_result']
        documents = self.query.find_all_by('review_category', query_list, projections)

        total_reviews = documents.count()
        run_info['total_review'] = total_reviews
        success = 0
        failure_reviews = []

        print("get uncalled review: {}".format(documents.count()))
        i = 0
        for doc in documents:
            review_id = doc['review_id']

            if 'combined_result' in doc:
                continue

            result = self.get_combined_result(doc['text'])
            if type(result) == 'NLPValueError': # fail
                failure_reviews.append(review_id)
            else:
                data = dict()
                val = None
                if 'entities'in result:
                    val = result['entities']
                else:
                    val = []

                data['entities'] = val

                if 'concepts'in result:
                    val = result['concepts']
                else:
                    val = []

                data['concepts'] = val

                if 'keywords'in result:
                    val = result['keywords']
                else:
                    val = []

                data['keywords'] = val

                if 'taxonomy'in result:
                    val = result['taxonomy']
                else:
                    val = []

                data['taxonomy'] = val

                query_list = [('review_id', review_id)]
                set_list = [('combined_result', data)]

                self.query.find_and_update('review_category', query_list, set_list)

                if success % 50 == 0:
                    print(str(success) + " reviews updated.")

                success += 1
                #pprint(result)

            i += 1
            if i == limit:
                break

        run_info['success'] = success
        run_info['fail'] = total_reviews - success
        if success != total_reviews:
            run_info['failure_ids'] = failure_reviews

        pprint(run_info)

        with open('./run2_log.txt', 'w+') as outfile:
            json.dump(run_info, outfile, indent=4, ensure_ascii=False)


        """
        result = self.get_combined_result(doc['text'])
        print(result)

        """

    def tee_perform(self):
        #['u-Gbz-uGIIKC0SN2MwXtLw', 'FyCc8g7LCVpU4BGCz-WUog']
        review_id = 'FyCc8g7LCVpU4BGCz-WUog'
        query_list = [('review_id', review_id)]
        projections = ['review_id', 'text']
        review = self.query.find_one('review_category', query_list, projections)

        if review is not None:
            sentiment_res = self.get_sentiment_result(review['text'])
            pprint(sentiment_res)
            sentiment = self.get_sentiment_result(review['text'])['docSentiment']
            query_list = [('review_id', review_id)]
            set_list = [('sentiment', sentiment)]

            self.query.find_and_update(self.collection_name, query_list, set_list)