def get_gZone(): err = [] ghash_to_write = [[], [], [], [], [], [], [], [], [], [], []] ghash_str = [[], [], [], [], [], [], [], [], [], [], []] names = [] with open('../data/g_boundery.txt', 'r') as f: gZ = {} gZone = [] gG = json.load(f) for z in gG['features']: coors = z['geometry']['coordinates'][0][0] name = z['properties']['name'] names.append(name) gZone.append(Path(coors)) for ghash in guangzhou_geohash5: count = [] for i, z in enumerate(gZone): pos = decode(ghash)[::-1] #生成四个点 delta = 0.01 pos1 = (pos[0] + delta, pos[1]) pos2 = (pos[0] - delta, pos[1]) pos3 = (pos[0], pos[1] - delta) pos4 = (pos[0], pos[1] + delta) num = 0 for p in (pos1, pos2, pos3, pos4): if z.contains_point(p): num += 1 count.append(num) max_count = max(count) if max_count: ind = count.index(max_count) if names[ind] in gZ: gZ[names[ind]].append(ghash) else: gZ[names[ind]] = [ghash] ghash_to_write[ind].append(wgs_gcj(*decode(ghash)[::-1])) ghash_str[ind].append(ghash) else: err.append(wgs_gcj(*decode(ghash)[::-1])) # for i in range(11): # with open(f'gZones{i}.json','w') as f: # # input(ghash_to_write[i]) # json.dump(ghash_to_write[i],f) # with open(f'geoHase{i}.json','w') as f: # json.dump(ghash_str[i],f) # lst = [] # for i in ghash_to_write: # lst += i # with open('gZones_all.json', 'w') as f: # json.dump(lst, f) # # with open('gZones_err.json', 'w') as f: # # input(ghash_to_write[i]) # json.dump(err, f) return gZ, gG
def __init__(self, input_data, fdid, state): # Assigning the parameters in the input data to the class for key in input_data.keys(): setattr(self, key, input_data[key]) self.fdid = fdid self.state = state #Getting long/lat coordinates out of incident distribution self.incident_coords = np.zeros((len(self.incident_distribution), 2)) for i, location in enumerate(self.incident_distribution): self.incident_coords[i, :] = np.flip(pgh.decode(location)) # Getting jurisdictional boundary self.get_boundary() # Counting number of units currently available self.count_available() # Getting coverage polygons of currently available units self.get_unit_coverage_paths() # Getting station coverage polygons self.get_station_coverage_paths() # Assigning each incident to coverage polygons self.build_sets() # Finding ideal set of stations self.max_coverage() # Calculating optimial movement strategy self.balanced_assignment() # Generating json output self.output_recommendations()
def Get_LatLon(df): for i in range(0, len(df)): latLon = pgh.decode(df.iloc[i]['geohash']) df.set_value(i, 'Latitude', latLon[0]) df.set_value(i, 'Longitude', latLon[1]) return df
def get_lonlat_zt(total): new_total = [] # 全部的多级编码 for bit in total: # 得到的前 n 位全部的编码 new_total.append(bit[:5]) # n=para-1 => (para-1)级分区 new_total = list(set(new_total)) # 编码去重 new_total_lonlat = [] for block in new_total: lat_zt, lon_zt = pgh.decode(block) new_total_lonlat.append([lon_zt, lat_zt]) return new_total_lonlat
def geohashes_to_polygon(array_of_hashes): polygon = 'POLYGON ((' for geohash in array_of_hashes: lat, lon = pgh.decode(geohash) polygon += f'lat lon, ' polygon = polygon[:-2] polygon += '))' return polygon
def initialize_military_view(self): # Getting all geohashes self.client.switch_database('covid19') get_geohashes = "SHOW TAG VALUES with KEY=geohash" results = self.client.query(get_geohashes).get_points() geohashes = [result['value'] for result in results] geohashes = np.array(geohashes) # Getting all Bases self.client.switch_database('bases') results = self.client.query(get_geohashes).get_points() bases_geohashes = [result['value'] for result in results] bases_geohashes = np.array(bases_geohashes) # Mapping Name --> Geohash query = "SELECT * FROM bases" results = self.client.query(query).get_points() geohash_to_location = {} for r in results: geohash, location = r['geohash'], r['location'] geohash_to_location[geohash] = location # Calculating distances between all bases and counties. rows, cols = len(bases_geohashes), len(geohashes) distance = [[0 for i in range(cols)] for j in range(rows)] for i in range(rows): x = pgh.decode(bases_geohashes[i]) for j in range(cols): y = pgh.decode(geohashes[j]) distance[i][j] = haversine(x, y, unit=Unit.MILES) distance = np.array(distance) # Creating a dictionary # {base_name : [ geohash_county_1, gh_county_2 ... ]} geohash_map = {} for i in range(rows): indices = np.argwhere(distance[i] < 50).flatten() geohash_map[geohash_to_location[ bases_geohashes[i]]] = geohashes[indices] return geohash_map
def getNameFromGeohash(geo_area): [lat, lon] = pygeohash.decode(geo_area) lat = float(lat) lon = float(lon) name_results = reverseGeocode(lat, lon) if (name_results['count'] > 0): first_result = name_results['results'][0] place_name = first_result['city'] if not place_name: place_name = first_result['name'] country = first_result['country'] return [place_name, country]
def get_yg(self): sql = 'SELECT id,lon,lat FROM stations ;' self.cursor.execute(sql) result = self.cursor.fetchall() for dict in result: print(dict) print(dict['lat'], dict['lon']) lat, lon = conversion.get_lonlat(dict['lat'], dict['lon']) print(lat, lon) geocode = encode(lat, lon) a, b = decode(geocode) print(a, b) sql = 'UPDATE stations SET geohash = "%s" WHERE id = %d ;' % ( geocode, dict['id']) # 更新下一站信息 print(sql) self.cursor.execute(sql) self.db.commit() print(result)
def calculate_match_range(geohash, radius=10): """Calculates lower and upper geohash boundaries for a given geohash and range in kilometers""" # Decode geohash latitude, longitude = pygeohash.decode(geohash) # Calculate lower boundaries lower_latitude = latitude - DEG_LATITUDE_PER_KM * radius lower_longitude = longitude - DEG_LONGITUDE_PER_KM * radius # Calculate upper boundaries upper_latitude = latitude + DEG_LATITUDE_PER_KM * radius upper_longitude = longitude + DEG_LONGITUDE_PER_KM * radius # Encode boundaries lower = pygeohash.encode(lower_latitude, lower_longitude) upper = pygeohash.encode(upper_latitude, upper_longitude) return lower, upper
def div_board(lat_list, lon_list): # 获取最值 lat_max = np.max(lat_list) lat_min = np.min(lat_list) lon_max = np.max(lon_list) lon_min = np.min(lon_list) # 枚举划分/步长: accu = 0.1 board_lat = [] board_lon = [] board_idx = [] # 存储所有划分的出来的区域的编码 for lat_i in np.arange(lat_min, lat_max, accu): for lon_i in np.arange(lon_min, lon_max, accu): c = pgh.encode(lat_i, lon_i) a, b = pgh.decode(c) board_lat.append(a) board_lon.append(b) board_idx.append(c) return board_lon, board_lat, board_idx
def preprocess_data(Data_dir, processed_file): if not os.path.exists(Data_dir): raise Exception("Data folder not exists") else: all_data = pd.read_csv(os.getcwd() + '/Data/feed_data.tsv') m, _ = all_data.shape text_data = all_data[all_data.columns.to_list()[-1]].fillna(0) location = all_data["posted_location"].fillna(0) text_len = np.zeros(shape=(m, 1), dtype=float) hash_tags = np.zeros(shape=(m, 1), dtype=float) lat = np.zeros(shape=(m, 1), dtype=float) lng = np.zeros(shape=(m, 1), dtype=float) i = 0 for x in text_data: text_len[i] = len(str(x)) hash_tags[i] = str(x).count('#') y = pg.decode(str(location.values[i])) lat[i] = y[0] lng[i] = y[1] i += 1 cols = [ 'feed_id', 'uid', 'ptid', 'likes', 'comments', 'post_age', 'textlength', 'hashtags', 'latitude', 'longitude', 'urls' ] pro_data = pd.DataFrame(columns=cols) pro_data["uid"] = all_data['postUserId'] pro_data["ptid"] = all_data['feedObjectId'] pro_data["feed_id"] = all_data['feed_id'] pro_data["likes"] = all_data['likes'] pro_data["comments"] = all_data['comments'] pro_data["post_age"] = all_data[all_data.columns[7]] pro_data["textlength"] = text_len pro_data["hashtags"] = hash_tags pro_data["latitude"] = lat pro_data["longitude"] = lng pro_data["urls"] = all_data['numberOfMediaUrls'] pro_data.to_csv(Data_dir + "{0}".format(processed_file), index=False) logging.info("Data Processed !!! \n File:{0} saved in {1}".format( processed_file, Data_dir))
def fetch_busiest_regions(): items = es.search(index=INDEX_NAME, doc_type='call', body={ "aggregations": { "towns": { "geohash_grid": { "field": "location", "precision": 5 } } } })['aggregations']['towns']['buckets'] results = defaultdict(int) geolocator = Nominatim() for item in items: lat, lon = pygeohash.decode(item['key']) location = geolocator.reverse('{}, {}'.format(lat, lon)) results[location.address] += item['doc_count'] return results
def get_station_coverage_paths(self): """ Using the station location geohashes, creates a dictionary of "coverage polygons" for the stations. For each station, the coverage polygon is the set of locations that is within a self.coverage_time travel time from the station. Also compute the centroid of stations as a reference point. Attributes ---------- self.station_coverage_paths: dict Dictionary of shapely polygons describing the coverage polygons for each station self.station_locs: list List of long/lat coordinates for each station self.station_list: list List of station ids """ self.station_coverage_paths = {} self.station_coverage_polys = {} self.station_list = [] # A list of all station ids self.station_locs = [] # A list of all station locations for i, status in enumerate(tqdm(self.station_status)): lat, long = pgh.decode(status['location']) self.station_locs.append([long, lat]) self.station_list.append(status['station_id']) polygons = self.drivetime_poly(self.station_locs, self.covered_time) for i, station in enumerate(self.station_list): self.station_coverage_paths[station] = polygons[i] self.station_coverage_polys[station] = Polygon( polygons[i].to_polygons()[0]).buffer(0) #Computing centroid of all stations so we can convert distances to miles self.station_centroid = [ np.mean(np.array(self.station_locs)[:, 0]), np.mean(np.array(self.station_locs)[:, 1]) ]
def get_unit_coverage_paths(self): """ Using the locations of currently available units, get corresponding coverage polygons and compute fraction of incidents that are within the coverage polygon of a unit Attributes ---------- self.unit_coverage_paths: dict Dictionary of shapely polygons describing the coverage polygons for each currently available unit self.unit_locs: list List of long/lat coordinates for each available unit self.unit_list: list List of available unit ids """ self.unit_coverage_paths = {} self.unit_coverage_polys = {} self.unit_list = [] # A list of all available unit ids self.unit_locs = [ ] # A list of all current locations of available units self.current_unionized_poly = Polygon() #Unionized coverage polygon for i, status in enumerate(tqdm(self.unit_status)): if status['status'] == 'AVAILABLE': lat, long = pgh.decode(status['current_location']) self.unit_locs.append([long, lat]) self.unit_list.append(status['unit_id']) polygons = self.drivetime_poly(self.unit_locs, self.covered_time) for i, unit in enumerate(self.unit_list): self.unit_coverage_paths[unit] = polygons[i] self.unit_coverage_polys[unit] = Polygon( polygons[i].to_polygons()[0]).buffer(0) self.current_unionized_poly = cascaded_union( self.unit_coverage_polys.values())
I_files = 'C:/Users/js0059/OneDrive - Coca-Cola Bottlers Japan/_MarkLogic/_Memos/VM_Tableau/Csv/xn76us_t_ccaa_dm_foot_traffic_par_p_201911191332_test.csv' O_files = 'C:/Users/js0059/OneDrive - Coca-Cola Bottlers Japan/_MarkLogic/_Memos/VM_Tableau/Csv/Gro_transfor.csv' f = open(I_files, 'r') reader = csv.reader(f) header = next(reader) #for row in reader: # writea = pgh.decode(row[0]) # print ("{},{}".format(writea,row)) fo = open(O_files, 'w') writer = csv.writer(fo, lineterminator='\n') for row in reader: writea = pgh.decode(row[0]) # print ("{},{}".format(writea,row)) # print(writea) row.append(pgh.decode(row[0])[0]) row.append(pgh.decode(row[0])[1]) writer.writerow(row) #writer.writerows(array2d) f.close() fo.close() ################################# #with open(I_files) as f: ##with open(O_files, 'w') as fo: # # reader = csv.reader(f) # for row in reader:
#I would like to thank Jason Tanuwijaya for his input and suggestions in the writing of this code #For this script in particular, credits go to Jason Tanuwijaya for his Python help in debugging version problems in this Python code and for his assistance with Python. import pandas as pd import pygeohash as pgh x = pd.read_csv("training.csv") location = x.geohash6.apply(lambda x: pgh.decode(x)) x['location'] = location x.to_csv('training_translated_gh.csv', index=False)
start_loc_geohash = [] end_loc_geohash = [] # 读取start_loc列 for row_of_cell in start_loc_range: for cell in row_of_cell: start_loc_geohash.append(cell.value) # 读取end_loc列 for row_of_cell in end_loc_range: for cell in row_of_cell: end_loc_geohash.append(cell.value) # step2: 转换为坐标 start_loc_coord = [] end_loc_coord = [] for i in range(len(start_loc_geohash)): decode = pgh.decode(start_loc_geohash[i]) start_loc_coord.append(decode) for i in range(len(end_loc_geohash)): decode = pgh.decode(end_loc_geohash[i]) end_loc_coord.append(decode) # step3: 生成散点图 # 设置图表大小、标题和标签: plt.figure(figsize=(10, 6)) plt.title("Scatter Graph of Coordinates", fontsize=24) plt.xlabel("Longitude", fontsize=14) plt.ylabel("Latitude", fontsize=14) # 设置刻度标记属性 plt.tick_params(axis='both', which='major', labelsize=14) # 画点,其中参数s设置了点的大小 for i in range(len(start_loc_coord)):
"""
@author: sbattersby """ import pygeohash as pgh import csv # open csv with geohash data r_file = open('d:\\Python\Tableau\geohash to lat lon\latlongInput.csv', mode = 'r') r_reader = csv.reader(r_file) # open output csv to hold geohash and lat/lon output geo_file = open('d:\\Python\Tableau\geohash to lat lon\geohashOutput.csv', mode = 'w', newline = '') geo_writer = csv.writer(geo_file, delimiter = ',', quotechar = '"') geo_writer.writerow(['geohash', 'long', 'lat']) # translate the geohash to lat/lon and write to new file row_count = 0 for row in r_reader: if row_count == 0: # header pass else: gh = row[2] # because the geohash coordinates were in the third column in my csv latlon = pgh.decode(row[2]) geo_writer.writerow([gh, latlon[1], latlon[0]]) row_count += 1 r_file.close() geo_file.close()
def test_decode(self): self.assertEqual(pgh.decode('ezs42'), (42.6, -5.6))
#625441.00 1674679 #123264.00 357034 #610.00 257686 #0.60 60469 #118.00 26897 #19.00 14425 #3.71 8596 pgh.encode(42.6, -5.6) # >>> 'ezs42e44yx96' pgh.encode(42.6, -5.6, precision=5) # >>> 'ezs42' pgh.decode('ezs42') # >>> (42.6, -5.6) pgh.geohash_approximate_distance('shi3u', 'sh83n') # >>> 625441 #Train.to_csv(r'C:\Users\ds1\Downloads\HomeWork_Lyft\train_clean.csv',index=False) #Train.to_csv(r'C:\Users\ds1\Downloads\HomeWork_Lyft\train_clean.csv',index=False) Train = pd.read_csv(r'C:\Users\ds1\Downloads\HomeWork_Lyft\train_clean.csv') #Test = pd.read_csv(r'C:\Users\ds1\Downloads\HomeWork_Lyft\test_clean.csv') Train.head() Train.geoDist.value_counts()
import csv import pygeohash as pgh import pandas as pd # 读取数据 filename = 'test.csv' df = pd.read_csv(filename) geohash_start_loc = [] geohash_end_loc = [] for i in range(len(df.index)): geohash_start_loc.append(df.iloc[i][5]) geohash_end_loc.append(df.iloc[i][6]) print("Loading complete.") # 坐标转换 coord_start_loc = [] coord_end_loc = [] for i in range(len(geohash_start_loc)): decodeTemp = pgh.decode(geohash_start_loc[i]) coord_start_loc.append(decodeTemp) decodeTemp = pgh.decode(geohash_end_loc[i]) coord_end_loc.append(decodeTemp) print("Conversion complete.") # 坐标覆盖写入 for i in range(len(coord_start_loc)): df.set_value(i, col='geohashed_start_loc', value=str(coord_start_loc[i])) df.set_value(i, col='geohashed_end_loc', value=str(coord_end_loc[i])) df.to_csv(filename) print("Rewrite complete.")
bases_geohashes = [result['value'] for result in results] bases_geohashes = np.array(bases_geohashes) # Mapping Name --> Geohash query = "SELECT * FROM bases" results = client.query(query).get_points() geohash_to_location = {} for r in results: geohash, location = r['geohash'], r['location'] geohash_to_location[geohash] = location # Calculating distances between all bases and counties. rows, cols = len(bases_geohashes), len(geohashes) distance = [[0 for i in range(cols)] for j in range(rows)] for i in range(rows): x = pgh.decode(bases_geohashes[i]) for j in range(cols): y = pgh.decode(geohashes[j]) distance[i][j] = haversine(x, y, unit=Unit.MILES) distance = np.array(distance) # Creating a dictionary # {base_name : [ geohash_county_1, gh_county_2 ... ]} geohash_map = {} for i in range(rows): indices = np.argwhere(distance[i] < 50).flatten() geohash_map[geohash_to_location[bases_geohashes[i]]] = geohashes[indices] start_storing = time.time() # only use data for the next 15 days today_date = (datetime.today().replace(
def get_lonlat(geo): # 获取大致分区: lon, lat = decode(geo) return lon, lat
import pymysql
path = 'D:\\Studia\\inz\\imgw\\' paths = [] with os.scandir('D:\\Studia\\inz\\imgw') as entries: for entry in entries: if 'B00608S' in entry.name: # suma opadu 10 min paths.append(path + entry.name) paths = paths[::-1] data = [] for path in paths: i = 0 with open(path) as csv_file: csv_reader = csv.reader(csv_file, delimiter=';') for row in csv_reader: if row[0] in station_codes: coord = pgh.decode(station_hash[row[0]]) table = [ row[0], station_codes[row[0]], row[1], row[2], row[3], station_hash[row[0]], f"POINT({coord[0]} {coord[1]})", ] data.append(table) i += 1 print(i) #####????? tab czy nie tab start = time.time()
# -*- coding:utf-8 -*-