def process():
    # as step 1 in `reviewer_classifier` is to discard the users with `review_count` less than 2 -> we already filter the users
    users = mongo_functions.mongo_get(
        collection='pre_user',
        filter={'review_count': {'$gt': 2}},
        fields={'reviews': 1, 'review_count': 1, 'grouped_reviews': 1},
        page_size=2000000
    )
    located_users = []
    index = 0
    users_len = len(users)
    classifier_codes = {}
    print('\n')
    for user in users:
        index += 1
        sys.stdout.write('\rProcessing user {}/{}...'.format(index, users_len))
        sys.stdout.flush()
        user_location, code = reviewer_classifier(user)
        if code is not None:
            if code not in classifier_codes:
                classifier_codes[code] = 0
            classifier_codes[code] += 1
        if user_location is None:
            continue
        user['local'] = user_location
        located_users.append(user)
    ordered_classification_codes = sorted([(key, value) for key, value in classifier_codes.items()], key=lambda x: x[0])
    print('\nSummary:')
    for item in ordered_classification_codes:
        print('{} users classified as: {}'.format(item[1], classifier_code_map[item[0]]))
    mongo_functions.batch_update(located_users, collection='pre_user', update='{"$set": item}')
def prepare():
    areas = mongo_functions.mongo_get(collection='pre_metropolitan_area')

    for area in areas:
        businesses = mongo_functions.mongo_get(
            collection='pre_business',
            filter={'tile10': {
                '$in': area['tiles']
            }})
        tile15_dict = _group_business_by_tile15(businesses)
        tile15_ordered_list = sorted([(key, value)
                                      for key, value in tile15_dict.items()],
                                     key=lambda x: len(x[1]),
                                     reverse=True)
        city_area = _group(set([tile for tile in tile15_dict.keys()]),
                           tile15_ordered_list[0][0])
        city_business = []
        for tile in city_area[0]:
            city_business += tile15_dict[tile]
        # mongo_functions.batch_upsert(city_business, collection='pre_city_business', update="{'$set': item}")
        area['city_center'] = list(
            tile_functions.tile_center(
                int(tile15_ordered_list[0][0].split('_')[0]),
                int(tile15_ordered_list[0][0].split('_')[1]), 15))
        area['city_tiles15'] = city_area[0]
        area['city_businesses'] = len(city_business)
    mongo_functions.batch_update(areas,
                                 collection='pre_metropolitan_area',
                                 update="{'$set': item}")
Esempio n. 3
0
def yearly_dissimilarity_ratio():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        business_dict = {}
        for year, reviews in _reviews_per_year(area).items():

            city_unique_visitors = city_unique_visitors_get(reviews, area)
            city_unique_residents = city_unique_residents_get(reviews, area)
            total_visitors = len(city_unique_visitors)
            total_residents = len(city_unique_residents)
            business_reviews_dict = business_reviews_dict_group(reviews)

            for business_id, business_reviews in business_reviews_dict.items():
                if business_id not in business_dict:
                    business_dict[business_id] = {
                        '_id': business_id,
                        'ratio_yearly': {}
                    }
                business_dict[business_id]['ratio_yearly'][str(year)] = float(
                    '{0:.9f}'.format(
                        business_dissimilarity_ratio(business_reviews,
                                                     total_visitors,
                                                     total_residents, area)))

        business_list = [item for item in business_dict.values()]
        mongo_functions.batch_update(
            business_list,
            collection='pre_business',
            update='{"$set": {"ratio_yearly": item["ratio_yearly"]}}')
def prepare():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={'city_area': area['_id']},
                                            fields={'user_id': 1})

        try:
            user_ids = list({review['user_id'] for review in reviews})
        except Exception as e:
            print(e)
            return

        try:
            user_type_dict = {}
            while len(user_ids) > 0:
                current_user_ids = user_ids[:100000]
                user_ids = user_ids[100000:]
                user_type_dict.update({
                    user['_id']: user['local']
                    for user in mongo_functions.mongo_get(
                        collection='pre_user',
                        filter={
                            '_id': {
                                '$in': current_user_ids
                            },
                            'local': {
                                '$exists': True
                            }
                        },
                        fields={'local': 1})
                })
        except Exception as e:
            print(e)
            return

        _reviews = []
        while len(reviews) > 0:
            review = reviews.pop()
            try:
                review['user_from'] = user_type_dict[review['user_id']]
                _reviews.append(review)
            except KeyError:
                pass
            except Exception as e:
                print(e)
                pass
        mongo_functions.batch_update(
            _reviews,
            collection='pre_review',
            update='{"$set": {"user_from": item["user_from"]}}')
Esempio n. 5
0
def prepare():
    category_dict = category_dict_prepare()
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={
                                              'tiles': 1,
                                              'name': 1
                                          }):
        print('processing business for ', area['_id'])
        business = mongo_functions.mongo_get(
            collection='pre_business',
            filter={"tile10": {
                '$in': area['tiles']
            }},
            fields={
                'name': 1,
                'categories': 1
            })
        business_prepare(business, category_dict)
        mongo_functions.batch_update(
            business,
            collection='pre_business',
            update='{"$set": {"norm_categories": item["norm_categories"]}}')
Esempio n. 6
0
def main():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={
                                                'city_area': area['_id'],
                                                'user_from': {
                                                    '$exists': True
                                                }
                                            },
                                            fields={
                                                'business_id': 1,
                                                'user_id': 1,
                                                'tile15': 1,
                                                'tile18': 1,
                                                'user_from': 1
                                            })
        city_unique_visitors = city_unique_visitors_get(reviews, area)
        city_unique_residents = city_unique_residents_get(reviews, area)
        total_visitors = len(city_unique_visitors)
        total_residents = len(city_unique_residents)
        business_reviews_dict = business_reviews_dict_group(reviews)
        business_list = []
        for business_id, business_reviews in business_reviews_dict.items():
            business = {
                '_id':
                business_id,
                'raw_ratio':
                business_dissimilarity_ratio(business_reviews, total_visitors,
                                             total_residents, area)
            }
            business_list.append(business)

        mongo_functions.batch_update(
            business_list,
            collection='pre_business',
            update='{"$set": {"raw_ratio": item["raw_ratio"]}}')