Example #1
0
def all_gold_dst():
    """Compute the distance between all gold regions and the query ones for
    all metrics."""
    assert GROUND_TRUTH, 'load GROUND_TRUTH before calling'
    districts = GROUND_TRUTH.keys()
    cities = GROUND_TRUTH.items()[0][1]['gold'].keys()
    cities.remove('paris')
    metrics = ['cluster', 'emd', 'emd-lmnn', 'jsd']
    results = {}
    for city, district in i.product(cities, districts):
        geo = GROUND_TRUTH[district]['gold']['paris'][0]['geometry']
        for metric in metrics:
            name = '_'.join([city, district, metric])
            info = interpret_query('paris', city, geo, metric)
            _, target_city, target_desc, regions_distance, _, threshold = info
            support = target_desc[1]
            candidates = get_gold_desc(target_city, district)
            if not candidates:
                print(name + ' is empty')
                continue
            current_dsts = []
            for region in candidates:
                features, _, weights, _ = region
                if metric == 'cluster' and weights.size < 3:
                    print("{}: can't make three clusters".format(name))
                    continue
                dst = generic_distance(metric, regions_distance, features,
                                       weights, support)
                if metric == 'leftover':
                    dst = emd_leftover.collect_matlab_output(1)
                    clean_tmp_mats()
                current_dsts.append(dst)
            results[name] = current_dsts
    return results
Example #2
0
def all_gold_dst():
    """Compute the distance between all gold regions and the query ones for
    all metrics."""
    assert GROUND_TRUTH, 'load GROUND_TRUTH before calling'
    districts = GROUND_TRUTH.keys()
    cities = GROUND_TRUTH.items()[0][1]['gold'].keys()
    cities.remove('paris')
    metrics = ['cluster', 'emd', 'emd-lmnn', 'jsd']
    results = {}
    for city, district in i.product(cities, districts):
        geo = GROUND_TRUTH[district]['gold']['paris'][0]['geometry']
        for metric in metrics:
            name = '_'.join([city, district, metric])
            info = interpret_query('paris', city, geo, metric)
            _, target_city, target_desc, regions_distance, _, threshold = info
            support = target_desc[1]
            candidates = get_gold_desc(target_city, district)
            if not candidates:
                print(name + ' is empty')
                continue
            current_dsts = []
            for region in candidates:
                features, _, weights, _ = region
                if metric == 'cluster' and weights.size < 3:
                    print("{}: can't make three clusters".format(name))
                    continue
                dst = generic_distance(metric, regions_distance, features,
                                       weights, support)
                if metric == 'leftover':
                    dst = emd_leftover.collect_matlab_output(1)
                    clean_tmp_mats()
                current_dsts.append(dst)
            results[name] = current_dsts
    return results
Example #3
0
def batch_matching(query_city='paris'):
    """Match preselected regions of `query_city` into the other target
    cities"""
    import ujson
    global QUERY_NAME
    global OTMPDIR
    with open('static/ground_truth.json') as gt:
        regions = ujson.load(gt)
    districts = sorted(regions.keys())
    cities = sorted(regions.values()[0]['gold'].keys())
    assert query_city in cities
    cities.remove(query_city)
    OTMPDIR = os.path.join(OTMPDIR, 'www_comparaison_'+query_city)
    try:
        os.mkdir(OTMPDIR)
    except OSError:
        pass
    # cities = ['berlin']
    # districts = ['montmartre', 'triangle']
    for city in cities:
        print(city)
        for neighborhood in districts:
    # for _ in [1]:
    #     for city, neighborhood in [('washington', 'marais'), ('washington', 'montmartre')]:
            print(neighborhood)
            possible_regions = regions[neighborhood]['gold'].get(query_city)
            rgeo = choose_query_region(possible_regions)
            if not rgeo:
                continue
            for metric in ['emd-itml', 'emd-tsne']:
            # for metric in ['jsd', 'emd', 'cluster', 'emd-lmnn', 'leftover']:
                print(metric)
                for radius in np.linspace(200, 500, 5):
                    print(radius)
                    QUERY_NAME = '{}_{}_{}_{}.my'.format(city, neighborhood,
                                                         int(radius),
                                                         metric)
                    logging.info('will write: '+str(os.path.join(OTMPDIR, QUERY_NAME)))
                    if os.path.isfile(os.path.join(OTMPDIR, QUERY_NAME)):
                        continue
                    res, values, _ = best_match(query_city, city, rgeo, radius,
                                                metric=metric).next()
                    continue
                    distance, r_vids, center, radius = res
                    print(distance)
                    if center is None:
                        result = {'dst': distance, 'metric': metric,
                                  'nb_venues': 0}
                    else:
                        center = cities.euclidean_to_geo(city, center)
                        result = {'geo': {'type': 'circle',
                                          'center': center, 'radius': radius},
                                  'dst': distance, 'metric': metric,
                                  'nb_venues': len(r_vids)}
                    regions[neighborhood][city].append(result)
                    # outname = '{}_{}_{}_{}.png'.format(city, neighborhood,
                    #                                    int(radius), metric)
                    # interpolate_distances(values, outname)
                with open('static/cpresets.js', 'w') as out:
                    out.write('var PRESETS =' + ujson.dumps(regions) + ';')
Example #4
0
def batch_matching(query_city='paris'):
    """Match preselected regions of `query_city` into the other target
    cities"""
    import ujson
    global QUERY_NAME
    global OTMPDIR
    with open('static/ground_truth.json') as gt:
        regions = ujson.load(gt)
    districts = sorted(regions.keys())
    cities = sorted(regions.values()[0]['gold'].keys())
    assert query_city in cities
    cities.remove(query_city)
    OTMPDIR = os.path.join(OTMPDIR, 'www_comparaison_' + query_city)
    try:
        os.mkdir(OTMPDIR)
    except OSError:
        pass
    # cities = ['berlin']
    # districts = ['montmartre', 'triangle']
    for city in cities:
        print(city)
        for neighborhood in districts:
            # for _ in [1]:
            #     for city, neighborhood in [('washington', 'marais'), ('washington', 'montmartre')]:
            print(neighborhood)
            possible_regions = regions[neighborhood]['gold'].get(query_city)
            rgeo = choose_query_region(possible_regions)
            if not rgeo:
                continue
            for metric in ['emd-itml', 'emd-tsne']:
                # for metric in ['jsd', 'emd', 'cluster', 'emd-lmnn', 'leftover']:
                print(metric)
                for radius in np.linspace(200, 500, 5):
                    print(radius)
                    QUERY_NAME = '{}_{}_{}_{}.my'.format(
                        city, neighborhood, int(radius), metric)
                    logging.info('will write: ' +
                                 str(os.path.join(OTMPDIR, QUERY_NAME)))
                    if os.path.isfile(os.path.join(OTMPDIR, QUERY_NAME)):
                        continue
                    res, values, _ = best_match(query_city,
                                                city,
                                                rgeo,
                                                radius,
                                                metric=metric).next()
                    continue
                    distance, r_vids, center, radius = res
                    print(distance)
                    if center is None:
                        result = {
                            'dst': distance,
                            'metric': metric,
                            'nb_venues': 0
                        }
                    else:
                        center = cities.euclidean_to_geo(city, center)
                        result = {
                            'geo': {
                                'type': 'circle',
                                'center': center,
                                'radius': radius
                            },
                            'dst': distance,
                            'metric': metric,
                            'nb_venues': len(r_vids)
                        }
                    regions[neighborhood][city].append(result)
                    # outname = '{}_{}_{}_{}.png'.format(city, neighborhood,
                    #                                    int(radius), metric)
                    # interpolate_distances(values, outname)
                with open('static/cpresets.js', 'w') as out:
                    out.write('var PRESETS =' + ujson.dumps(regions) + ';')