def get_zones(key, path, layer, epss, epst, **kwargs): results_dir = 'data/results/' cluster_hash = clusterer(path, layer, epss, epst, **kwargs) # draw regions result = [] cluster_dir = 'data/fuzzy-matches/clusters/' regex = re.compile('^(\d+)%s.csv$' % cluster_hash) if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1: pool = mp.Pool(int(kwargs['pool_size'])) result = pool.map(Distribution.get_region, [ pd.read_csv(open(cluster_dir + filename, 'rU')) for filename in os.listdir(cluster_dir) if regex.match(filename) ]) pool.close() pool.join() else: for filename in os.listdir(cluster_dir): if regex.match(filename): result.append( Distribution.get_region( pd.read_csv(open(cluster_dir + filename, 'rU')))) # create json for ploting on Google Maps print('INFO: creating plot object') regions = '' for region in result: df = '{lat: ' + region['lat'].map(str) + ', lng: ' + region['lon'].map( str) + '}' json = '[' + df.str.cat(sep=',') + ']' regions = regions + json + ',' # create HTML file with plot and finish with open('templates/google-shape.html', 'r') as file: template = file.read() with open('%stotalizer%s.json' % (cluster_dir, cluster_hash)) as file: totalizer = file.read() template = template.replace('<?=LIST?>', regions).replace('<?=KEY?>', key).replace( '<?=DATA?>', totalizer) if 'filename' in kwargs.keys(): filename = kwargs['filename'] else: filename = 'regions-fuzzymatcher-' + IdGenerator.uuid4().hex + '.html' with open(results_dir + filename, 'w+') as file: file.write(template) print(results_dir + filename)
def get_region(df, columns): df = df[columns] df.columns = ['lat', 'lon'] df = Distribution.get_region(df) df = '{"lat": '+ df['lat'].map(str) +', "lng": '+ df['lon'].map(str) +', "teta": '+ df['teta'].map(str) +'}' return '[' + df.str.cat(sep=',') + ']'
def quantify_connected_components(key, path, layer, origin, distance_precision, time_precision, confidence=2, **kwargs): # load data result = [] counts = [] if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1: pool = mp.Pool(int(kwargs['pool_size'])) counts = mp.Manager().list() result = pool.map( load_matches_csv, [(path, file, counts) for file in os.listdir(path) if 'file_regex' not in kwargs.keys() or kwargs['file_regex'].match(file)]) pool.close() pool.join() else: kwargs['pool_size'] = 1 for file in os.listdir(path): if 'file_regex' not in kwargs.keys() or kwargs['file_regex'].match( file): result.append(load_matches_csv((path, file, counts))) frame = pd.concat(list(result)) frame.reset_index(inplace=True) frame = frame[[ layer + '_lat', layer + '_lon', layer + '_timestamp', 'score_spatial', 'score_temporal' ]] frame.columns = [ 'lat', 'lon', 'timestamp', 'score_spatial', 'score_temporal' ] frame = Bucketizer.bucketize_dataframe(frame, origin, distance_precision, time_precision) frame = frame.groupby(by=['lat_bucket', 'lon_bucket']).agg({ 'timestamp': 'count', 'lat': 'mean', 'lon': 'mean', 'score_spatial': 'mean', 'score_temporal': 'mean', }) frame = frame.rename(columns={'timestamp': 'count'}) frame['mark'] = frame['count'] > (frame['count'].mean() + confidence * frame['count'].std()) frame = frame[frame['mark']] frame['label'] = 0 lock = Lock() p = Pool(kwargs['pool_size'], initargs=(lock, ), initializer=init_child) mgr = Manager() ns = mgr.Namespace() ns.frame = frame args = [ns] * kwargs['pool_size'] p.map_async(connected_compenents_labeling, args) p.close() p.join() fname = 'regions-fuzzymatcher-l%s-tp%d-dp%d-c%d' % ( layer, time_precision, distance_precision, confidence) if 'filename' in kwargs.keys(): fname = kwargs['filename'] frame = ns.frame.copy() frame.to_csv('data/results/%s.csv' % fname) frame = frame[frame['label'] != 0] frame = frame.groupby(by='label') # get metadata about clusters totalizer = frame[['score_spatial', 'score_temporal']].mean() totalizer['count'] = frame['count'].sum() totalizer = '{"score_spatial": ' + totalizer['score_spatial'].map( str) + ', "score_temporal": ' + totalizer['score_temporal'].map( str) + ', "count": ' + totalizer['count'].map(str) + '}' totalizer = totalizer.str.cat(sep=',') with open('data/results/%s-totalizer.json' % fname, 'w+') as file: file.write('[' + totalizer + ']') # draw regions result = [] if 'pool_size' in kwargs.keys() and int(kwargs['pool_size']) > 1: pool = mp.Pool(int(kwargs['pool_size'])) result = pool.map(Distribution.get_region, [gdf for (name, gdf) in frame]) pool.close() pool.join() else: for (name, gdf) in frame: result.append(Distribution.get_region(gdf)) # create json for ploting on Google Maps print('INFO: creating plot object') regions = '' for region in result: df = '{"lat": ' + region['lat'].map( str) + ', "lng": ' + region['lon'].map(str) + '}' json = '[' + df.str.cat(sep=',') + ']' regions = regions + json + ',' # create HTML file with plot and finish with open('templates/google-shape.html', 'r') as file: template = file.read() template = template.replace('<?=LIST?>', regions).replace('<?=KEY?>', key).replace( '<?=DATA?>', totalizer) results_dir = 'data/results/' with open(results_dir + fname + '.html', 'w+') as file: file.write(template) return results_dir + fname