Example #1
0
def get_radius_of_region(center_point, polygons):
    if not polygons:
        return None

    radius = 0
    points_num = 0
    for polygon in polygons:
        for point in polygon:
            radius += geo.distance(center_point, point)
            points_num += 1

    if points_num == 0:
        return 0

    return radius / points_num
Example #2
0
def get_radius_of_region(center_point, polygons):
    if not polygons:
        return None

    radius = 0
    points_num = 0
    for polygon in polygons:
        for point in polygon:
            radius += geo.distance(center_point, point)
            points_num += 1

    if points_num == 0:
        return 0

    return radius / points_num
Example #3
0
def group_all_restaurants(city_id, center_point, max_distance_to_center,
                          max_region_rsts_num):
    # for logging
    rst_in_region_num = 0
    rst_near_region_num = 0
    rst_far_from_region_num = 0
    exc_num = 0

    # 1. get all valid and not BOD restaurants
    rsts = inner.query_all_rsts(city_ids=[city_id], is_valid=1, is_premium=0,
                                region_type=region_base.REGION_TYPE_CBD)
    rsts_info = [{'id': r['id'],
                  'lng': r['longitude'],
                  'lat': r['latitude']} for r in rsts]
    rst_len = len(rsts)
    log.info('Step 1: query. all rsts to group count is {}'.format(rst_len))

    # 2. sort restaurants by their distance to center of the city
    for rst in rsts_info:
        rst['center_dist'] = geo.distance(center_point,
                                          (rst['lng'], rst['lat']))
        rst['min_region_dist'] = rst['region_id'] = None

    sorted_rsts = sorted(rsts_info, key=lambda rest: rest['center_dist'])
    log.info('Step 2: sorted complete, farest dist is {}'.
             format(sorted_rsts[-1]['center_dist']))

    # 3. calculate each region's center point and radius
    regions = region_inner.query_regions(city_ids=[city_id],
                                         type_codes=[REGION_TYPE_CBD])
    regions_info = {r['id']:
                    {'area': region_area_to_polygons(r['area'], reverse=True)}
                    for r in regions}
    region_id_to_del = []
    for region_id, region in regions_info.iteritems():
        region['center'] = get_centerpoint_of_region(region['area'], only_one=True)
        region['radius'] = get_radius_of_region(region['center'], region['area'])
        # info['rst_ids'] = []
        region['rst_num'] = 0
        if not (region['center'] and region['radius']):
            region_id_to_del.append(region_id)
    for r_id in region_id_to_del:
        regions_info.__delitem__(r_id)
    log.info('Step 3: calculate center and radius')

    # 4. main loop for restaurants, there are three kinds of results
    #  for each restaurant:
    #       a. in certain region
    #       b. near certain region (no more than `max_distance_to_center`)
    #       c. others (drop into ungrouped region)
    for index, rst in enumerate(sorted_rsts):
        try:
            if index % 100 == 0:
                log.info('grouping rate {}%'.format(float(index) / rst_len * 100))

            rst_point = (rst['lng'], rst['lat'])
            final_region_id = None

            # a. if in certain region
            in_region_flag = False
            for region_id, region in regions_info.iteritems():
                if not region['area']:
                    continue
                polygons = region['area']
                for polygon in polygons:
                    if geo.is_in_region(rst_point, polygon, xy_reverse=True):
                        in_region_flag = True
                        if region['rst_num'] < max_region_rsts_num:
                            rst['region_id'] = region_id
                            region['rst_num'] += 1
                            rst_in_region_num += 1  # for logging
                        break

                if in_region_flag:
                    break

            if rst.get('region_id'):
                continue

            # b. if near some region
            min_dist = sys.maxint
            for region_id, region in regions_info.iteritems():
                current_dist = geo.distance(rst_point, region['center'])
                if current_dist < min_dist and \
                        region['rst_num'] < max_region_rsts_num:
                    min_dist = current_dist

                    if min_dist < max_distance_to_center + region['radius']:
                        rst['min_region_dist'] = min_dist
                        region['rst_num'] += 1
                        final_region_id = region_id
                        rst_near_region_num += 1  # for logging
                        break

            if not final_region_id:
                rst['region_id'] = None
                rst_far_from_region_num += 1  # for logging
        except Exception as e:
            exc_num += 1
            print(e)

    rst_total_num = len(sorted_rsts)
    average_rst_region_dist = 0
    rst_count = 0
    for rst in sorted_rsts:
        if rst['min_region_dist'] is not None:
            rst_count += 1
            average_rst_region_dist += rst['min_region_dist']
    average_rst_region_dist /= rst_count

    region_average_rst_num = 0
    region_rst_num_count = 0
    region_average_radius = 0
    region_radius_count = 0
    for region_id, region in regions_info.iteritems():
        if region['rst_num'] is not None:
            region_rst_num_count += 1
            region_average_rst_num += region['rst_num']
        if region['radius'] is not None:
            region_radius_count += 1
            region_average_radius += region['radius']
    region_average_rst_num /= region_rst_num_count
    region_average_radius /= region_radius_count

    log.info('Step 4: Main grouping. total_rst_num: {}, total_region_num: {}, '
             'rst_in_region: {}, rst_near_region: {}, other_rsts: {};'
             'ave rst region dist: {}, ave region rst num: {},'
             ' ave region radius {}. total exc {}.'.
             format(rst_total_num, len(regions_info), rst_in_region_num,
                    rst_near_region_num, rst_far_from_region_num,
                    average_rst_region_dist, region_average_rst_num,
                    region_average_radius, exc_num))

    # 5. update restaurant region
    for index, rst in enumerate(sorted_rsts):
        try:
            if index % 200 == 0:
                log.info('updating rate {}%'.format(float(index) / rst_len * 100))
            # log.info('update rst {} with region {}'.format(rst.id, region_id))
            # inner.update_restaurant_region(rst['id'], rst['region_id'])
            log.info('rst_id: {}, region_id: {}'.format(rst['id'], rst['region_id']))

        except Exception as e:
            exc_num += 1
            print(e)
    for region_id, region in regions_info.iteritems():
        log.info('{}: {}'.format(region_id, region['rst_num']))

    log.info('done.')
    return True