예제 #1
0
def get_zones(key, path, layer, epss, epst, **kwargs):
    results_dir = 'data/results/'

    cluster_hash = clusterer(path, layer, epss, epst, **kwargs)

    # draw regions
    result = []
    cluster_dir = 'data/fuzzy-matches/clusters/'
    regex = re.compile('^(\d+)%s.csv$' % cluster_hash)
    if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1:
        pool = mp.Pool(int(kwargs['pool_size']))
        result = pool.map(Distribution.get_region, [
            pd.read_csv(open(cluster_dir + filename, 'rU'))
            for filename in os.listdir(cluster_dir) if regex.match(filename)
        ])
        pool.close()
        pool.join()
    else:
        for filename in os.listdir(cluster_dir):
            if regex.match(filename):
                result.append(
                    Distribution.get_region(
                        pd.read_csv(open(cluster_dir + filename, 'rU'))))

    # create json for ploting on Google Maps
    print('INFO: creating plot object')
    regions = ''
    for region in result:
        df = '{lat: ' + region['lat'].map(str) + ', lng: ' + region['lon'].map(
            str) + '}'
        json = '[' + df.str.cat(sep=',') + ']'
        regions = regions + json + ','

    # create HTML file with plot and finish
    with open('templates/google-shape.html', 'r') as file:
        template = file.read()

    with open('%stotalizer%s.json' % (cluster_dir, cluster_hash)) as file:
        totalizer = file.read()

    template = template.replace('<?=LIST?>',
                                regions).replace('<?=KEY?>', key).replace(
                                    '<?=DATA?>', totalizer)

    if 'filename' in kwargs.keys():
        filename = kwargs['filename']
    else:
        filename = 'regions-fuzzymatcher-' + IdGenerator.uuid4().hex + '.html'

    with open(results_dir + filename, 'w+') as file:
        file.write(template)

    print(results_dir + filename)
예제 #2
0
def get_region(df, columns):
    df = df[columns]
    df.columns = ['lat', 'lon']
    df = Distribution.get_region(df)
    df = '{"lat": '+ df['lat'].map(str) +', "lng": '+ df['lon'].map(str) +', "teta": '+ df['teta'].map(str) +'}'
    return '[' + df.str.cat(sep=',') + ']'
예제 #3
0
def quantify_connected_components(key,
                                  path,
                                  layer,
                                  origin,
                                  distance_precision,
                                  time_precision,
                                  confidence=2,
                                  **kwargs):
    # load data
    result = []
    counts = []
    if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1:
        pool = mp.Pool(int(kwargs['pool_size']))
        counts = mp.Manager().list()
        result = pool.map(
            load_matches_csv,
            [(path, file, counts)
             for file in os.listdir(path) if 'file_regex' not in kwargs.keys()
             or kwargs['file_regex'].match(file)])
        pool.close()
        pool.join()
    else:
        kwargs['pool_size'] = 1
        for file in os.listdir(path):
            if 'file_regex' not in kwargs.keys() or kwargs['file_regex'].match(
                    file):
                result.append(load_matches_csv((path, file, counts)))

    frame = pd.concat(list(result))
    frame.reset_index(inplace=True)
    frame = frame[[
        layer + '_lat', layer + '_lon', layer + '_timestamp', 'score_spatial',
        'score_temporal'
    ]]
    frame.columns = [
        'lat', 'lon', 'timestamp', 'score_spatial', 'score_temporal'
    ]

    frame = Bucketizer.bucketize_dataframe(frame, origin, distance_precision,
                                           time_precision)

    frame = frame.groupby(by=['lat_bucket', 'lon_bucket']).agg({
        'timestamp':
        'count',
        'lat':
        'mean',
        'lon':
        'mean',
        'score_spatial':
        'mean',
        'score_temporal':
        'mean',
    })

    frame = frame.rename(columns={'timestamp': 'count'})

    frame['mark'] = frame['count'] > (frame['count'].mean() +
                                      confidence * frame['count'].std())
    frame = frame[frame['mark']]
    frame['label'] = 0

    lock = Lock()
    p = Pool(kwargs['pool_size'], initargs=(lock, ), initializer=init_child)
    mgr = Manager()
    ns = mgr.Namespace()

    ns.frame = frame

    args = [ns] * kwargs['pool_size']
    p.map_async(connected_compenents_labeling, args)

    p.close()
    p.join()

    fname = 'regions-fuzzymatcher-l%s-tp%d-dp%d-c%d' % (
        layer, time_precision, distance_precision, confidence)
    if 'filename' in kwargs.keys():
        fname = kwargs['filename']

    frame = ns.frame.copy()
    frame.to_csv('data/results/%s.csv' % fname)
    frame = frame[frame['label'] != 0]
    frame = frame.groupby(by='label')

    # get metadata about clusters
    totalizer = frame[['score_spatial', 'score_temporal']].mean()
    totalizer['count'] = frame['count'].sum()
    totalizer = '{"score_spatial": ' + totalizer['score_spatial'].map(
        str) + ', "score_temporal": ' + totalizer['score_temporal'].map(
            str) + ', "count": ' + totalizer['count'].map(str) + '}'
    totalizer = totalizer.str.cat(sep=',')

    with open('data/results/%s-totalizer.json' % fname, 'w+') as file:
        file.write('[' + totalizer + ']')

    # draw regions
    result = []
    if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1:
        pool = mp.Pool(int(kwargs['pool_size']))
        result = pool.map(Distribution.get_region,
                          [gdf for (name, gdf) in frame])
        pool.close()
        pool.join()
    else:
        for (name, gdf) in frame:
            result.append(Distribution.get_region(gdf))

    # create json for ploting on Google Maps
    print('INFO: creating plot object')
    regions = ''
    for region in result:
        df = '{"lat": ' + region['lat'].map(
            str) + ', "lng": ' + region['lon'].map(str) + '}'
        json = '[' + df.str.cat(sep=',') + ']'
        regions = regions + json + ','

    # create HTML file with plot and finish
    with open('templates/google-shape.html', 'r') as file:
        template = file.read()

    template = template.replace('<?=LIST?>',
                                regions).replace('<?=KEY?>', key).replace(
                                    '<?=DATA?>', totalizer)

    results_dir = 'data/results/'
    with open(results_dir + fname + '.html', 'w+') as file:
        file.write(template)

    return results_dir + fname