Ejemplo n.º 1
0
def read_data():
    train_data = pd.read_csv(path + 'operation_train_new.csv')
    test_data = pd.read_csv(path + 'operation_round1_new.csv')
    ope_data = pd.concat([train_data, test_data])
    ope_data['ope_appro_geo_code'] = ope_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)
    ope_data['ope_latitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[0] if (x == x) else x)
    ope_data['ope_longitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[1] if (x == x) else x)
    ope_data['ope_hour'] = ope_data['time'].apply(lambda x: int(x[0:2]))
    ope_data['ope_device_main_kind'] = ope_data['device2'].apply(
        lambda x: x.split(' ')[0] if (x == x) else x)
    ope_data['main_version'] = ope_data['version'].apply(
        lambda x: x.split('.')[0] if (x == x) else -1)

    ope_data['os'] = ope_data['os'].astype(str)

    ope_data['is_strange_mac1'] = ope_data['mac1'].apply(
        lambda x: 1 if x == 'a8dc52f65085212e' else 0)
    ope_data['is_strange_mac2'] = ope_data['mac2'].apply(
        lambda x: 1 if x == 'a8dc52f65085212e' else 0)
    ope_data['is_strange_device_code3'] = ope_data['device_code3'].apply(
        lambda x: 1 if x == '14c09cc8ce23d46c' else 0)
    ope_data['hour_bin'] = ope_data['ope_hour'].apply(lambda x: hour_bin[x])

    return ope_data
Ejemplo n.º 2
0
def geohash_shape(shape, precision, mode='intersect', threshold=None):
    """
    Find list of geohashes to cover the shape
    :param shape: shape to cover
    :type shape: BaseGeometry
    :param precision: geohash precision
    :type precision: int
    :param mode: 'intersect' - all geohashes intersect the shape
                               use 'threashold' option to specify a percentage of least coverage
                 'inside' - all geohashes inside the shape
                 'center' - all geohashes whose center is inside the shape
    :type mode: str
    :param threshold: percentage of least coverage
    :type threshold: float
    :return: list of geohashes
    :rtype: list
    """
    (min_lon, min_lat, max_lon, max_lat) = shape.bounds

    hash_south_west = geohash.encode(min_lat, min_lon, precision)
    hash_north_east = geohash.encode(max_lat, max_lon, precision)

    box_south_west = geohash.decode_exactly(hash_south_west)
    box_north_east = geohash.decode_exactly(hash_north_east)

    per_lat = box_south_west[2] * 2
    per_lon = box_south_west[3] * 2

    lat_step = int(round((box_north_east[0] - box_south_west[0]) / per_lat))
    lon_step = int(round((box_north_east[1] - box_south_west[1]) / per_lon))

    hash_list = []

    for lat in range(0, lat_step + 1):
        for lon in range(0, lon_step + 1):
            next_hash = neighbor(hash_south_west, [lat, lon])
            if mode == 'center':
                (lat_center, lon_center) = decode(next_hash)
                if shape.contains(Point(lon_center, lat_center)):
                    hash_list.append(next_hash)
            else:
                next_bbox = geohash.bbox(next_hash)
                next_bbox_geom = box(next_bbox['w'], next_bbox['s'],
                                     next_bbox['e'], next_bbox['n'])

                if mode == 'inside':
                    if shape.contains(next_bbox_geom):
                        hash_list.append(next_hash)
                elif mode == 'intersect':
                    if shape.intersects(next_bbox_geom):
                        if threshold is None:
                            hash_list.append(next_hash)
                        else:
                            intersected_area = shape.intersection(
                                next_bbox_geom).area
                            if (intersected_area /
                                    next_bbox_geom.area) >= threshold:
                                hash_list.append(next_hash)

    return hash_list
Ejemplo n.º 3
0
def data_process():  #数据预处理
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    test["geohashed_end_loc"] = np.nan
    all = train.append(test)

    all["starttime"] = pd.to_datetime(all["starttime"])
    all["day"] = all["starttime"].dt.day
    all["hour"] = all["starttime"].dt.hour
    all["minute"] = all["starttime"].dt.minute
    all["minute_from"] = all["hour"] * 60 + all["minute"]
    all["work_day"] = all["day"].apply(
        lambda x: 1 if x in [13, 14, 20, 21, 28, 29, 30] else 0)
    all["lon_start"] = all["geohashed_start_loc"].map(
        lambda x: round(geohash.decode_exactly(x)[1], 7))
    all["lat_start"] = all["geohashed_start_loc"].map(
        lambda x: round(geohash.decode_exactly(x)[0], 7))

    def split(x):
        """
        if x<5:return 0
        if x<10:return 1
        if x<14:return 2
        if x<18:return 3
        if x<21:return 4
        """
        if x < 7: return 0
        if x < 10: return 1
        if x == 12: return 2
        if x < 17: return 3
        if x < 20: return 4
        return 5

    all["period"] = all["hour"].apply(split)
    return all
Ejemplo n.º 4
0
def read_data():
    pop_col_names = [
        'day', 'mode', 'ope_hour', 'version', 'device_code1', 'device_code2',
        'device_code3', 'mac1', 'ip1', 'ip2', 'mac2', 'wifi',
        'ope_appro_geo_code', 'ip1_sub', 'ip2_sub'
    ]
    train_data = pd.read_csv(path + 'operation_train_new.csv',
                             dtype={
                                 'device1': str,
                                 'geo_code': str
                             })
    train_data['ope_appro_geo_code'] = train_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)
    train_data['ope_hour'] = train_data['time'].apply(lambda x: int(x[0:2]))

    test_data = pd.read_csv(path + 'operation_round1_new.csv',
                            dtype={
                                'device1': str,
                                'geo_code': str
                            })
    test_data['ope_appro_geo_code'] = test_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)

    test_data['ope_hour'] = test_data['time'].apply(lambda x: int(x[0:2]))
    for col in pop_col_names:
        get_ope_popular_degree(ope_data=train_data)
        get_ope_popular_degree(ope_data=test_data)

    ope_data = pd.concat([train_data, test_data])
    ope_data['ope_latitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[0] if (x == x) else x)
    ope_data['ope_longitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[1] if (x == x) else x)

    return ope_data
Ejemplo n.º 5
0
def read_data():
	pop_col_names = ['day', 'merchant', 'code1', 'code2', 'acc_id1', 'device_code1', 'device_code2', 'device_code3',
					 'mac1', 'ip1', 'acc_id2', 'acc_id3', 'market_code', 'ip1_sub']
	
	train_data = pd.read_csv(path + 'transaction_train_new.csv', 
						dtype = {
							'device1': str,
							'geo_code':str
						})
	train_data['trans_appro_geo_code'] = train_data['geo_code'].apply(lambda x: x[0:2] if (x == x) else x)

	test_data = pd.read_csv(path + 'transaction_round1_new.csv',
						dtype = {
							'device1': str,
							'geo_code':str
						})
	for col in pop_col_names:
		get_trans_popular_degree(trans_data=train_data)
		get_trans_popular_degree(trans_data=test_data)

	train_data['trans_appro_geo_code'] = train_data['geo_code'].apply(lambda x: x[0:2] if (x == x) else x)
	
	trans_data = pd.concat([train_data, test_data])
	
	trans_data['trans_latitude'] = trans_data['geo_code'].apply(lambda x: geohash.decode_exactly(x)[0] if (x == x) else  x)
	trans_data['trans_longitude'] = trans_data['geo_code'].apply(lambda x: geohash.decode_exactly(x)[1] if (x == x) else  x)
	trans_data['trans_hour'] = trans_data['time'].apply(lambda x: int(x[0:2]))

	return trans_data
Ejemplo n.º 6
0
def geohash_decode(hashed_code_l):
    result = []
    for i in hashed_code_l:
        temp = gh.decode_exactly(i)
        #result.append([temp[0],temp[1]])
        result.append([round(temp[0], 4), round(temp[1], 4)])  # 采用4位小数
    return result
Ejemplo n.º 7
0
def encdec():
    print 'geohash.encode ' + geohash.encode(12.963787, 77.637789)
    print 'geohash.encode with precision-5: ' + geohash.encode(
        12.963787, 77.637789, precision=5)
    print 'geohash.decode ' + str(geohash.decode('tdr1wxype953'))
    print 'geohash.decode exactly' + str(
        geohash.decode_exactly('tdr1wxype953'))
Ejemplo n.º 8
0
def bbox_query(extent, tree, precision):
    """Given an extent and tree loaded with geohashes, return all geohashes which intersect the extent"""
    tl_hash = geohash.encode(extent[3], extent[0], precision=precision)
    tr_hash = geohash.encode(extent[3], extent[1], precision=precision)
    br_hash = geohash.encode(extent[2], extent[1], precision=precision)
    bl_hash = geohash.encode(extent[2], extent[0], precision=precision)

    common_hash = commonprefix([tl_hash, tr_hash, br_hash, bl_hash])
    intersecting_hashes = tree.prefix_query(common_hash)
    centroids = [
        geohash.decode_exactly(x)[:2][::-1] for x in intersecting_hashes
    ]

    xspace = x_spacing(centroids)
    yspace = y_spacing(centroids)

    valid_list = []

    for idx, hash in enumerate(intersecting_hashes):
        centroid = centroids[idx]
        if centroid[0] < extent[1] + xspace * 0.5 and centroid[
                0] > extent[0] - xspace * 0.5 and centroid[
                    1] < extent[3] + yspace * 0.5 and centroid[
                        1] > extent[2] - yspace * 0.5:
            valid_list.append(hash)
    return list(set(valid_list))
Ejemplo n.º 9
0
 def __init__(self, *args, **kwargs):
     __parcel_bucket_document = kwargs.get('parcelBucketDocument', '')
     __request = kwargs.get('request', None)
     (__lat, __lng, __lat_err, __lng_err) = geohash.decode_exactly(__parcel_bucket_document['key'])
     __bbox = self.__bbox(__parcel_bucket_document)
     __properties = self.__properties(__parcel_bucket_document, __bbox, request=__request)
     self.__class__.__name__ = 'Feature'
     geojson.Feature.__init__(self, geometry=geojson.Point((float(__lng), float(__lat))), properties=__properties, bbox=__bbox)
Ejemplo n.º 10
0
def get_neighbor_by_direction(hashcode, direction):
    if not isinstance(direction, tuple):
        raise TypeError(
            "direction should be a tuple of form (y_step, x_step).")
    (lat, lon, lat_delta, lon_delta) = geohash.decode_exactly(hashcode)
    (lat_step, lon_step) = direction
    nlat, nlon = _get_neighbor(lat, lat_delta, lat_step), _get_neighbor(
        lon, lon_delta, lon_step)
    return geohash.encode(nlat, nlon, len(hashcode))
Ejemplo n.º 11
0
def on_message(mosq, userdata, msg):
    # print("%s (qos=%s, r=%s) %s" % (msg.topic, str(msg.qos), msg.retain, str(msg.payload)))
    '''
        "position": {
                "id": 18300,
                "attributes": {
                        "t": "I",
                        "ignition": false,
                        "distance": 0,
                        "totalDistance": 28212813.97,
                        "motion": false,
                        "hours": 55000
                },
                "deviceId": 7,
                "protocol": "owntracks",
                "serverTime": null,
                "deviceTime": "2018-09-10T07:47:45.000+0000",
                "fixTime": "2018-09-10T07:47:45.000+0000",
                "outdated": false,
                "valid": true,
                "latitude": 49.0156556,
                "longitude": 8.3975169,
                "altitude": 0,
    '''

    try:
        d = json.loads(msg.payload)
    except:
        return

    if 'position' in d:
        p = d['position']
        if 'latitude' not in p or 'longitude' not in p:
            return

    olat = lat = float(p['latitude'])
    olon = lon = float(p['longitude'])

    print("lat=", lat, "lon=", lon)
    ghash = geohash.encode(lat, lon, GEO_PREC)
    t = datetime.datetime.now()
    R = ''

    if ghash in cdb:
        data = json.loads(cdb[ghash])
        print(t, "R=", R, msg.topic, ghash, json.dumps(data, indent=4))
    else:
        hash_list = proximityhash.create_geohash(lat, lon, 100, 7).split(',')
        for neighbor in hash_list:
            # print("--->", neighbor)
            lat, lon, a, b = geohash.decode_exactly(neighbor)
            R = haversine(olon, olat, lon, lat) * 1000.0
            # print("N=",neighbor, lat, ",", lon)
            if neighbor in cdb:
                data = json.loads(cdb[neighbor])
                print(t, "R=%6.1fm" % R, msg.topic, neighbor,
                      json.dumps(data, indent=4))
Ejemplo n.º 12
0
def points_from_geohash4(geohashlist):
    total = [['GEOHASH', 'LONG', 'LAT']]
    for row in geohashlist:
        y, x, yd, xd = geohash.decode_exactly(row)
        pt1 = [row, x + xd, y + yd]  # ne
        pt2 = [row, x - xd, y - yd]  # sw
        pt3 = [row, x + xd, y - yd]  # se
        pt4 = [row, x - xd, y + yd]  # nw
        total += [pt1, pt2, pt3, pt4]
    total = pd.DataFrame(total[1:], columns=total[0])
    return total
Ejemplo n.º 13
0
def points_from_geohash4(geohashlist):
	total = [['GEOHASH','LONG','LAT']]
	for row in geohashlist:
		y,x,yd,xd = geohash.decode_exactly(row)
		pt1 = [row,x+xd,y+yd] # ne
		pt2 = [row,x-xd,y-yd] # sw
		pt3 = [row,x+xd,y-yd] # se
		pt4 = [row,x-xd,y+yd] # nw
		total += [pt1,pt2,pt3,pt4]
	total = pd.DataFrame(total[1:],columns=total[0])
	return total
Ejemplo n.º 14
0
def get_position(ghash,xsign,ysign):
	y,x,ydelta,xdelta = geohash.decode_exactly(ghash)
	if xsign == '-':
		x = x - xdelta
	if xsign == '+':
		x = x + xdelta
	if ysign == '-':
		y = y - ydelta
	if ysign == '+':
		y = y + ydelta
	return x,y
Ejemplo n.º 15
0
def get_hashsize(ul,size):
	# getting geohash for ul and lr
	# assuming 8 for the time being as abs min
	ulhash = geohash.encode(ul[1],ul[0],size)

	lat,long,latdelta,longdelta = geohash.decode_exactly(ulhash)

	latdelta,longdelta = latdelta * 2.0,longdelta * 2.0

	hashsize = ((latdelta ** 2) + (longdelta ** 2)) ** .5

	return hashsize
Ejemplo n.º 16
0
def read_data():
    train_data = pd.read_csv(path + 'transaction_train_new.csv')
    test_data = pd.read_csv(path + 'transaction_round1_new.csv')
    trans_data = pd.concat([train_data, test_data])
    trans_data['trans_appro_geo_code'] = trans_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)
    trans_data['trans_latitude'] = trans_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[0] if (x == x) else x)
    trans_data['trans_longitude'] = trans_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[1] if (x == x) else x)
    trans_data['trans_hour'] = trans_data['time'].apply(lambda x: int(x[0:2]))
    trans_data['trans_device_main_kind'] = trans_data['device2'].apply(
        lambda x: x.split(' ')[0] if (x == x) else x)

    trans_data['hour_bin'] = trans_data['trans_hour'].apply(
        lambda x: hour_bin[x])
    trans_data['is_strange_mac1'] = trans_data['mac1'].apply(
        lambda x: 1 if x == 'a8dc52f65085212e' else 0)
    trans_data['is_strange_bal'] = trans_data['bal'].apply(lambda x: 1
                                                           if x == 100 else 0)
    return trans_data
Ejemplo n.º 17
0
def get_corner(hash,corner):	
	lat,long,latdelta,longdelta = geohash.decode_exactly(hash)

	# ul corner
	if corner == 'ul':
		lat = lat + (3 * latdelta)
		long = long - (3 * longdelta)
		return geohash.encode(lat,long,len(hash))
	elif corner == 'lr':
		lat = lat - (3 * latdelta)
		long = long + (3 * longdelta)
		return geohash.encode(lat,long,len(hash))
def read_data():
    pop_col_names = [
        'day', 'mode', 'ope_hour', 'version', 'device_code1', 'device_code2',
        'device_code3', 'mac1', 'ip1', 'ip2', 'mac2', 'wifi',
        'ope_appro_geo_code', 'ip1_sub', 'ip2_sub'
    ]
    train_data = pd.read_csv("../../data/train/operation_train_new.csv",
                             dtype={
                                 'device1': str,
                                 'geo_code': str
                             })
    train_data['ope_appro_geo_code'] = train_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)
    train_data['ope_hour'] = train_data['time'].apply(lambda x: int(x[0:2]))

    test_data = pd.read_csv("../../data/test/test_operation_round2.csv",
                            dtype={
                                'device1': str,
                                'geo_code': str
                            })
    test_data['ope_appro_geo_code'] = test_data['geo_code'].apply(
        lambda x: x[0:2] if (x == x) else x)

    test_data['ope_hour'] = test_data['time'].apply(lambda x: int(x[0:2]))
    for col in pop_col_names:
        get_ope_popular_degree(ope_data=train_data)
        get_ope_popular_degree(ope_data=test_data)

    #sampling完全没用 结果大幅下降
    #test_data = test_data.sample(n=len(train_data), random_state=0)

    ope_data = pd.concat([train_data, test_data])
    ope_data['ope_latitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[0] if (x == x) else x)
    ope_data['ope_longitude'] = ope_data['geo_code'].apply(
        lambda x: geohash.decode_exactly(x)[1] if (x == x) else x)
    #get_ip_pop_degree(ope_data)

    return ope_data
Ejemplo n.º 19
0
def geohash_to_polygon(geo):
    """
    :param geo: String that represents the geohash.
    :return: Returns a Shapely's Polygon instance that represents the geohash.
    """
    lat_centroid, lng_centroid, lat_offset, lng_offset = geohash.decode_exactly(
        geo)

    corner_1 = (lat_centroid - lat_offset, lng_centroid - lng_offset)[::-1]
    corner_2 = (lat_centroid - lat_offset, lng_centroid + lng_offset)[::-1]
    corner_3 = (lat_centroid + lat_offset, lng_centroid + lng_offset)[::-1]
    corner_4 = (lat_centroid + lat_offset, lng_centroid - lng_offset)[::-1]

    return geometry.Polygon([corner_1, corner_2, corner_3, corner_4, corner_1])
Ejemplo n.º 20
0
def augment_data(raw_data):
    """
  raw_data is whatever data provided (input) from the csv file
  This function converts raw_data into train data
  """
    temp, n_day, n_time = get_14_days_data(raw_data)
    temp_norm = prepare_data_for_cluster(temp)
    clustered = cluster(temp_norm)

    # get demand for eact unique time "t"
    by_t = temp.pivot_table(index="geohash6", columns="t",
                            values="demand").fillna(0)

    # get fourier values
    top_5_freqs, top_5_amplis, agg_fouriers = get_fourier(by_t)

    # get latlon for each geohash
    latlon = map(lambda x: geohash.decode_exactly(x), top_5_amplis.index)
    loc = pd.DataFrame(
        {
            'lat': [x[1] for x in latlon],
            'lon': [x[0] for x in latlon]
        },
        index=top_5_amplis.index)

    # merge fourier and location
    var_add = pd.concat([top_5_freqs, top_5_amplis, loc], axis=1)

    # get previous demand values
    selected_periods = [1, 2, 3, 4, 5, 6, 7, 8, 96, 192]
    ds = get_n_previous_demand(by_t, selected_periods)

    # create training data
    base = temp[[
        "geohash6", "t", "demand", "dayofweek", "hour", "minutes", "timestamp",
        "day"
    ]]
    base = pd.merge(ds.reset_index(), base, how="right", on=["geohash6",
                                                             "t"]).dropna()
    base = pd.merge(base, clustered, on=["geohash6", "dayofweek"], how="left")
    base = pd.merge(
        base,
        var_add.reset_index().rename(columns={'index': 'geohash6'}),
        how="left",
        on="geohash6")
    base = base.sort_values(["geohash6", "t"])
    base.index = range(len(base))

    # return training data and table of all previous demands
    return base, by_t
Ejemplo n.º 21
0
def fill_geohashs(data,size):
	# function for linting whether the first point and lastpoint are the same if not appends talbe
	data = first_last(data)

	
	extrema = get_extrema(data)

	# getting upper lefft and lowerright point
	ul = [extrema['w'],extrema['n']]
	lr = [extrema['e'],extrema['s']]


	# getting geohash for ul and lr
	# assuming 8 for the time being as abs min
	ulhash = geohash.encode(ul[1],ul[0],size)
	lrhash = geohash.encode(lr[1],lr[0],size)

	lat,long,latdelta,longdelta = geohash.decode_exactly(ulhash)

	latdelta,longdelta = latdelta * 2.0,longdelta * 2.0

	hashsize = ((latdelta ** 2) + (longdelta ** 2)) ** .5

	count = 0
	for row in data.columns.values.tolist():
		if 'lat' in str(row).lower():
			latheader = row
		elif 'long' in str(row).lower():
			longheader = row
		count += 1


	count = 0
	newlist = []
	for row in data[[longheader,latheader]].values.tolist():
		if count == 0:
			count = 1
		else:
			dist = distance(oldrow,row)
			if dist > hashsize / 5.0:
				number = (dist / hashsize) * 5.0
				number = int(number)
				newlist += generate_points(number,oldrow,row)[1:]
			else:
				newlist.append(row)
		oldrow = row

	newlist = pd.DataFrame(newlist,columns=['LONG','LAT'])
	newlist = map_table(newlist,size,map_only=True)
	return newlist
Ejemplo n.º 22
0
def neighbor(geo_hash, direction):
    """
    Find neighbor of a geohash string in certain direction.
    :param geo_hash: geohash string
    :type geo_hash: str
    :param direction: Direction is a two-element array, i.e. [1,0] means north, [1,1] means northeast
    :type direction: list
    :return: geohash string
    :rtype: str
    """
    decode_result = geohash.decode_exactly(geo_hash)
    neighbor_lat = decode_result[0] + direction[0] * decode_result[2] * 2
    neighbor_lon = decode_result[1] + direction[1] * decode_result[3] * 2
    return geohash.encode(neighbor_lat, neighbor_lon, len(geo_hash))
Ejemplo n.º 23
0
 def __init__(self, *args, **kwargs):
     __parcel_bucket_document = kwargs.get('parcelBucketDocument', '')
     __request = kwargs.get('request', None)
     (__lat, __lng, __lat_err,
      __lng_err) = geohash.decode_exactly(__parcel_bucket_document['key'])
     __bbox = self.__bbox(__parcel_bucket_document)
     __properties = self.__properties(__parcel_bucket_document,
                                      __bbox,
                                      request=__request)
     self.__class__.__name__ = 'Feature'
     geojson.Feature.__init__(self,
                              geometry=geojson.Point(
                                  (float(__lng), float(__lat))),
                              properties=__properties,
                              bbox=__bbox)
Ejemplo n.º 24
0
 def getImages(self, request):
     imageSet = db.GqlQuery("SELECT * FROM DropletImage")
     logging.info("Image count from database" + str(imageSet.count()))
     list = []
     for e in imageSet:
         location = geohash.decode_exactly(e.geoHash)
         logging.info("image")
         a = imageData(imageID=e.imageID,
                       url=images.get_serving_url(e.blobKey),
                       flag=e.flag,
                       like=e.like,
                       score=1,
                       latitude=str(location[0]),
                       longitude=str(location[1]))
         list.append(a)
     logging.info("Image count for response" + str(len(list)))
     return responseMessage(images=list)
Ejemplo n.º 25
0
def get_alignment_geohash(hash):
    #processing out the 4 points
    hashreturn = geohash.decode_exactly(hash)

    #getting lat and long datu
    latdatum = hashreturn[0]
    longdatum = hashreturn[1]

    #getting delta
    latdelta = hashreturn[2]
    longdelta = hashreturn[3]

    point1 = [latdatum-latdelta, longdatum+longdelta]
    point2 = [latdatum-latdelta, longdatum-longdelta]
    point3 = [latdatum+latdelta, longdatum-longdelta]
    point4 = [latdatum+latdelta, longdatum+longdelta]

    return [point1,point2,point3,point4,point1]
Ejemplo n.º 26
0
def get_alignment_geohash(hash):
    #processing out the 4 points
    hashreturn = geohash.decode_exactly(hash)

    #getting lat and long datu
    latdatum = hashreturn[0]
    longdatum = hashreturn[1]

    #getting delta
    latdelta = hashreturn[2]
    longdelta = hashreturn[3]

    point1 = [latdatum - latdelta, longdatum + longdelta]
    point2 = [latdatum - latdelta, longdatum - longdelta]
    point3 = [latdatum + latdelta, longdatum - longdelta]
    point4 = [latdatum + latdelta, longdatum + longdelta]

    return [point1, point2, point3, point4, point1]
Ejemplo n.º 27
0
def get_data(file_path):
    print("Loading data..", file_path)
    with file_io.FileIO(file_path, mode='rb') as input_f:
        df = pd.read_csv(input_f)
    df[['h', 'm']] = df['timestamp'].str.split(':', expand=True)
    df['h'] = df['h'].astype('int64')
    df['m'] = df['m'].astype('int64')
    df['mins'] = (df['h'] * 60) + df['m']
    df['mins_norm'] = df['mins'] / 1440
    df['dow'] = df['day'] % 7

    # Resolve GeoCodes
    geohashes_df = df.groupby('geohash6', as_index=False).agg({
        'day': 'count'
    }).rename(columns={
        'day': 'count'
    }).sort_values(by='count', ascending=False)
    geohashes_df['lat'] = None
    geohashes_df['lat_err'] = None
    geohashes_df['long'] = None
    geohashes_df['long_err'] = None
    geohashes_df['x'] = None
    geohashes_df['y'] = None
    geohashes_df['z'] = None

    for i in range(len(geohashes_df)):
        geo_decoded = geohash.decode_exactly(geohashes_df.loc[i, 'geohash6'])
        geohashes_df.loc[i, 'lat'] = geo_decoded[0]
        geohashes_df.loc[i, 'long'] = geo_decoded[1]
        geohashes_df.loc[i, 'lat_err'] = geo_decoded[2]
        geohashes_df.loc[i, 'long_err'] = geo_decoded[3]

        # https://datascience.stackexchange.com/a/13575
        geohashes_df.loc[i, 'x'] = cos(geo_decoded[0]) * cos(
            geo_decoded[1])  # cos(lat) * cos(lon)
        geohashes_df.loc[i, 'y'] = cos(geo_decoded[0]) * sin(
            geo_decoded[1])  # cos(lat) * sin(lon)
        geohashes_df.loc[i, 'z'] = sin(geo_decoded[0])  # sin(lat)

    df = df.merge(geohashes_df.drop(columns=['count']),
                  on='geohash6',
                  how='inner')

    return df
Ejemplo n.º 28
0
def make_points_geohash(c1,c2,size):
	lat1,long1,latdelta,longdelta = geohash.decode_exactly(c1)
	lat2,long2 = geohash.decode(c2)

	latdelta,longdelta = latdelta * 2,longdelta * 2
	
	# creating lats and longs
	longs = np.linspace(long1,long2,(abs(long2 - long1) / longdelta) + 1)
	lats = np.linspace(lat1,lat2,(abs(lat2 - lat1) / latdelta) + 1)



	total = []
	count = 0
	for long in longs:
		total += zip(lats,[long] * len(lats),[count] * len(lats))
		count += 1
	total = pd.DataFrame(total,columns=['LAT','LONG','X'])

	return map_table_new(total,precision=size)
Ejemplo n.º 29
0
    def geohash_to_polygon(self, geo: str, Geometry: bool = True, sequence: bool = True) -> [list, BaseGeometry]:
        """
        将Geohash字符串转成矩形

        Parameters
        ----------
        geo : str
            所求Geohash字符串
        Geometry : boolean, optional
            返回格式可以为Polygon或是点的列表,默认为Polygon
            True返回shapely.geometry.Polygon,False返回点的列表
        sequence : bool, optional
            返回时点的格式,默认为[lon, lat]。
            True为[lon, lat], False为[lat, lon]

        Returns
        ----------
        list or shapely.geometry.base.BaseGeometry
            geohash所对应的矩形

        Example
        ---------
        >>> G = GeohashOperator()
        >>> G.geohash_to_polygon('wx4ervz', True, True)
        POLYGON ((116.3658142089844 39.97787475585938, 116.3671875 39.97787475585938, 116.3671875 39.979248046875,
        116.3658142089844 39.979248046875, 116.3658142089844 39.97787475585938))

        求一个geohash字符串对应的
        """
        lat_centroid, lng_centroid, lat_offset, lng_offset = geohash.decode_exactly(geo)
        corner_1 = (lat_centroid - lat_offset, lng_centroid - lng_offset)
        corner_2 = (lat_centroid - lat_offset, lng_centroid + lng_offset)
        corner_3 = (lat_centroid + lat_offset, lng_centroid + lng_offset)
        corner_4 = (lat_centroid + lat_offset, lng_centroid - lng_offset)
        if sequence:
            corner_1, corner_2, corner_3, corner_4 = corner_1[::-1], corner_2[::-1], corner_3[::-1], corner_4[::-1]
        if Geometry:
            return Polygon([corner_1, corner_2, corner_3, corner_4, corner_1])
        else:
            return [corner_1, corner_2, corner_3, corner_4, corner_1]
Ejemplo n.º 30
0
def make_squares(data,presicion):
	import numpy as np
	import pandas as pd
	hashs=[]
	count=0
	boxes=[['HASH','LAT1','LONG1','LAT2','LONG2','LAT3','LONG3','LAT4','LONG4']]
	for row in data[1:]:
		#processing out the 4 points
		hashreturn=geohash.decode_exactly(row[-1])

		#getting lat and long datu
		latdatum=hashreturn[0]
		longdatum=hashreturn[1]

		#getting delta
		latdelta=hashreturn[2]
		longdelta=hashreturn[3]

		point1=[latdatum-latdelta,longdatum+longdelta]
		point2=[latdatum-latdelta,longdatum-longdelta]
		point3=[latdatum+latdelta,longdatum+longdelta]
		point4=[latdatum+latdelta,longdatum-longdelta]


		pointrow=[row[-1]]+point1+point2+point3+point4
		boxes.append(pointrow)
		hashs.append(row[-1])


	newlist=[['GEOHASH','LAT1','LONG1','LAT2','LONG2','LAT3','LONG3','LAT4','LONG4']]
	boxes=pd.DataFrame(boxes[1:],columns=newlist[0])
	boxes['COUNT']=1
	boxes=boxes.groupby(newlist[0],sort=True).sum()
	boxes=boxes.sort_values(by=['COUNT'],ascending=False)
	boxes.to_csv('squares'+str(presicion)+'.csv')

	return data
Ejemplo n.º 31
0
def ind_dec_points(alignmentdf):
	# getting alignment df
	header = alignmentdf.columns.values.tolist()

	count =0
	for row in header:
		if 'lat' in row.lower():
			latpos = count
		elif 'long' in row.lower():
			longpos = count
		elif 'geohash' in row.lower():
			hashpos = count
		count += 1
	xs = []
	ys = []
	for row in alignmentdf.values.tolist():
		lat = row[latpos]
		long = row[longpos]
		ghash = row[hashpos]
		
		midlat,midlong,latdelta,longdelta = geohash.decode_exactly(ghash)
		ulcornerpoint = [midlat + latdelta,midlong - longdelta]

		latsize = latdelta * 2
		longsize = longdelta * 2

		x = abs(ulcornerpoint[1] - long) / longsize
		y = abs(ulcornerpoint[0] - lat) / latsize

		xs.append(x)
		ys.append(y)

	alignmentdf['x'] = xs
	alignmentdf['y'] = ys

	return alignmentdf
Ejemplo n.º 32
0
def ind_dec_points(alignmentdf):
    # getting alignment df
    header = alignmentdf.columns.values.tolist()

    count = 0
    for row in header:
        if 'lat' in row.lower():
            latpos = count
        elif 'long' in row.lower():
            longpos = count
        elif 'geohash' in row.lower():
            hashpos = count
        count += 1
    xs = []
    ys = []
    for row in alignmentdf.values.tolist():
        lat = row[latpos]
        long = row[longpos]
        ghash = row[hashpos]

        midlat, midlong, latdelta, longdelta = geohash.decode_exactly(ghash)
        ulcornerpoint = [midlat + latdelta, midlong - longdelta]

        latsize = latdelta * 2
        longsize = longdelta * 2

        x = abs(ulcornerpoint[1] - long) / longsize
        y = abs(ulcornerpoint[0] - lat) / latsize

        xs.append(x)
        ys.append(y)

    alignmentdf['x'] = xs
    alignmentdf['y'] = ys

    return alignmentdf
Ejemplo n.º 33
0
def get_lon(geohash):
    x, y, x_s, y_s = geo.decode_exactly(geohash)
    return y
Ejemplo n.º 34
0
def get_aggregates():
    start = datetime.strptime(request.args.get('datetime'), fmt)
    points = collection.find({"$and": [{"aggtime": start}, {"hashlen": 7}]})
    output = []
    heatdata = ""
    for p in points:
        loc = gh.decode_exactly(p['geohash7'])
        str = "{" + "location: new google.maps.LatLng({1}, {2}), weight: {0}".format(
            p['count'] * 20, loc[0], loc[1]) + "},"
        heatdata += str
    return """
    <!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <title>Heatmaps</title>
    <style>
      #map {
        height: 100%;
      }
      html, body {
        height: 100%;
        margin: 0;
        padding: 0;
      }
    </style>
  </head>

  <body>
    
    <div id="map"></div>
    <script>

      var map, heatmap;

      function initMap() {
        map = new google.maps.Map(document.getElementById('map'), {
          zoom: 13,
          center: {lat: 37.775, lng: -122.434},
          mapTypeId: 'satellite'
        });

        heatmap = new google.maps.visualization.HeatmapLayer({
          data: getPoints(),
          map: map,
          opacity: 1
        });
      }


      function getPoints() {
        return [
          #HEATDATA#
        ];
      }
    </script>
    <script async defer
        src="https://maps.googleapis.com/maps/api/js?key=AIzaSyDA4bPlz1-ity0j9rUXInpx4nvUxJVpQ28&libraries=visualization&callback=initMap">
    </script>
  </body>
</html>
    """.replace("#HEATDATA#", heatdata)
Ejemplo n.º 35
0
def Tucker(datain, _x, _y, _z):
    # cell size and data range of coordinates
    xmax, xmin, ymax, ymin = 117.073, 115.668, 40.465, 39.465
    xsize, ysize = round((xmax - xmin) * 200), round((ymax - ymin) * 200)

    # import data and decode geohashed location
    df = pd.read_csv(datain)  #,date_parser='starttime')
    loc = [geohash.decode_exactly(i) for i in df['geohashed_start_loc']]
    df['start_x'], df['start_y'] = [i[1] for i in loc], [i[0] for i in loc]

    ## prepare a new dataset for group calculation
    df['datetime'] = pd.to_datetime(df['starttime'])
    dt = pd.DataFrame({
        'ts': df['datetime'],
        'id': df['orderid'],
        'x': df['start_x'],
        'y': df['start_y']
    })
    #dt=dt.query("ts >= '{}' and ts <= '{}'".format('2018-03-08 00:00:00', '2018-04-10 00:00:00'))
    dt = dt.set_index(['ts'])

    dt['date'] = [(month - 5) * 31 + day - 10
                  for month, day in zip(dt.index.month, dt.index.day)]
    dt['hour'] = dt.index.hour

    dt = dt[dt['x'] <= xmax]
    dt = dt[dt['x'] >= xmin]
    dt = dt[dt['y'] <= ymax]
    dt = dt[dt['y'] >= ymin]

    x = ((np.array(dt['x']) - xmin) * ysize) // 1
    y = ((np.array(dt['y']) - ymin) * ysize) // 1
    dt['loc'] = y * xsize + x  # transform x,y to cell id

    # join data
    idx = [(x, y) for x in range(24)
           for y in [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14]]
    dt_join = pd.DataFrame({'idx': idx})
    dt_new = dt.groupby(['loc', 'hour', 'date']).count()['id']

    group_count = dt_new.unstack(['loc'])
    group_count['idx'] = group_count.index
    group_count = pd.merge(dt_join, group_count, how='left', on='idx')
    group_count = group_count.set_index(['idx'])
    group_count = group_count.fillna(0)

    # generate tensor to decomposition
    X = np.array(group_count)
    #max_cell=np.max(b,axis=0)
    #m=np.tile(max_cell,(15*24,1))
    #b=b/m
    X = X.reshape(24, 14, -1)  #15327
    X = X.transpose([2, 0, 1])  #张量转置

    # tensor decomposition
    X = tl.tensor(X)
    core, factors = non_negative_tucker(X, rank=[_x, _y, _z])  #non_negative_

    # plot
    for i in range(factors[1].shape[1]):
        plt.plot(factors[1][:, i])
    plt.ylabel('Mode Value')
    plt.xlabel('Hour')
    plt.savefig('pic/M1.png', dpi=300)
    plt.show()

    for i in range(factors[2].shape[1]):
        plt.plot(factors[2][:, i])
    plt.ylabel('Mode Value')
    plt.xlabel('Day')
    plt.savefig('pic/M2.png', dpi=300)
    plt.show()

    space_out = pd.DataFrame(factors[0], index=group_count.columns)
    space_out['id'] = space_out.index
    space_out.to_csv('space.csv', index=False)

    generateShp('space.csv')
    return factors[1], factors[2]
Ejemplo n.º 36
0
    else:
        return int(i)


filt_in = "test_geostart.txt"
file_output = 'test_geostart_latlng.txt'
radar_echos = []
#

outputfile = open(file_output, "w")
i = 0
with open(filt_in, "r") as infile:
    for line in infile:
        context = line.split(",")
        #geohash.decode(context[0])
        print(context[0])
        print(geohash.decode(context[0]))
        outputfile.write(context[0])
        outputfile.write(',')
        geolatlng = geohash.decode_exactly(context[0])
        outputfile.write(str(geolatlng[0]))
        outputfile.write(',')
        outputfile.write(str(geolatlng[1]))
        outputfile.write(',')
        outputfile.write(str(geolatlng[2]))
        outputfile.write(',')
        outputfile.write(str(geolatlng[3]))
        outputfile.write('\n')

        #break
outputfile.close()
Ejemplo n.º 37
0
def encdec():
	print 'geohash.encode '+geohash.encode(12.963787,77.637789)
	print 'geohash.encode with precision-5: '+geohash.encode(12.963787,77.637789, precision=5)
	print 'geohash.decode '+str(geohash.decode('tdr1wxype953'))
	print 'geohash.decode exactly'+str(geohash.decode_exactly('tdr1wxype953'))
Ejemplo n.º 38
0
def map_points(point):
	lat,long,delta1,delta2 = geohash.decode_exactly(point)
	return str(lat) + ',' + str(long)
Ejemplo n.º 39
0
def make_line_index(data,h5filename,multiprocessing=False,split=1,processes=1,appendsize=5000,gidheader='gid'):
	# renaming columns
	gidbool = False
	newlist = []

	# checking to see if gid exists
	for row in data.columns:
		if row == gidheader:
			gidbool = True
			newlist.append('gid')
		else:
			newlist.append(row)
	
	# creating new column list
	data.columns = newlist

	# making the default number of processing for the platform
	if processes == 1 and multiprocessing == False:
		processes = cpu_count()

	if not processes == 1:
		multiprocessing = True
		
		# logic for making gid non comma separated
		data['BOOL'] = data.gid.astype(str).str.contains('[',regex=False)
		if not len(data[data['BOOL'] == True]) == 0:
			newlist = []
			for i in data['gid'].values:
				if '[' in i:
					i = i[1:-1]
					i = i.replace(' ','')
					i = str.split(i,',')
					i = '|'.join(i)
				newlist.append(i)
			data['gid'] = newlist

		# removing processfiles
		remove_process_files()

		print('Creating multiple processes.')
		make_processes(data,h5filename,processes,appendsize)
		
		return []

	csvfilenamealignment = 'process%sa.csv' % split
	csvfilenameneighbor = 'process%sn.csv' % split

	with open(csvfilenamealignment,'wb') as b:
		b.write(b'"GEOHASH","TEXT"')

	with open(csvfilenameneighbor,'wb') as b:
		b.write(b'"GEOHASH","TEXT"')


	try:
		os.remove(h5filename)
	except:
		pass


	coordbool = False
	# checking to see if gid exists
	for row in data.columns:
		if row.lower() == 'coords':
			coordbool = True
			coordheader = row


	# getting maxdistance if applicable
	maxdistance = False
	maxdistancebool = False
	for i in data.columns:
		if 'maxdistance' in str(i).lower():
			maxdistanceheader = i
			maxdistancebool = True

	# adding the correct maxdistance field
	if maxdistancebool == False:
		newlist = []
		for i in data['coords'].values:
			newlist.append(get_max_distance(get_cords_json(i)))
		data['maxdistance'] = newlist
		maxdistancebool = True
		maxdistanceheader = 'maxdistance'

	# logic for zipping together right obects
	if gidbool == True:
		iterdata = data[['gid',coordheader,maxdistanceheader]].values
	else:
		iterdata = zip(range(len(data)),data[coordheader],data[maxdistanceheader])


	# retriving the sise of 1 geohash
	firstpoint = get_cords_json(data[coordheader].values[0])[0]
	lat,long,latdelta,longdelta = geohash.decode_exactly(geohash.encode(firstpoint[1],firstpoint[0],9))
	ghashsize = ((latdelta*2)**2 + (longdelta*2)**2) ** .5

	# checking for cardinal coords and getting extrema if applicable
	ind = 0
	for row in data.columns:
		if 'north' in str(row).lower():
			nhead = row
			ind += 1
		if 'south' in str(row).lower():
			shead  = row
			ind += 1
		if 'east' in str(row).lower():
			ehead = row
			ind += 1
		if 'west' in str(row).lower():
			whead  = row
			ind += 1

	# logic for getting the extrema
	if ind == 4:
		extrema = { 'n':data[nhead].max(),
		  's':data[shead].min(),
		  'w':data[whead].min(),
		  'e':data[ehead].max()}
	else:
		extrema = []


	size = len(data)
	addgeohashs = []
	total = 0
	count = 0
	msgsize = 0

	for gid,coords,maxdistance in iterdata:
		coords = get_cords_json(coords)
		addgeohashs += fill_geohashs(coords,gid,9,maxdistance,ghashsize)
		count += 1
		if count == appendsize:
			count = 0
			total += appendsize
			make_csvs(addgeohashs,csvfilenamealignment,csvfilenameneighbor,split,total,size)	
			addgeohashs = []

	# appending add geohashs that are left over from the last append size
	if not count == 0:
		make_csvs(addgeohashs,csvfilenamealignment,csvfilenameneighbor,split,size,size)	

	if multiprocessing == False:
		# making lines mask for no reason
		linemask1,linemask2 = make_line_mask(data)

		# make line index metadata
		metadata = make_meta_lines(8,9,len(data),extrema)
		df = pd.DataFrame(['stringdf',json.dumps(linemask2),json.dumps(metadata)],index=['ultindex','areamask','metadata'])
		
		# writing output to h5 file
		if not h5filename == False:
			with pd.HDFStore(h5filename) as out:
				out['combined'] = df
				out['alignmentdf'] = data


		print('Made output h5 file containing datastructures:')
		print('\t- alignmentdf (type: pd.DataFrame)')
		print('\t- areamask (type: dict)')
		print('\t- ultindex (type: dict)')
		print('\t- metadata (type: dict)')
Ejemplo n.º 40
0
def geohash_decode(hashed_code):
    temp = gh.decode_exactly(hashed_code)
    return [temp[0], temp[1]]
Ejemplo n.º 41
0
import geohash
a = ['dre9', 'drft', 'dr8g', 'drfw', 'dr8j', 'dr8k', 'drfz', 'dre2', 'dr8n', 'dre7', 'dr8s', 'drdm', 'dr8v', 'drdk', 'dr8t', 'dr8u', 'dr95', 'dr8y', 'dr7b', 'dr9h', 'drk0', 'drhp', 'dre3', 'dpxf', 'dr7r', 'dr7k', 'dr7j', 'dr7h', 'dr7m', 'drk8', 'dr5x', 'dr5z', 'dr7w', 'dpxu', 'dpxv', 'dr7t', 'dr5q', 'dr5r', 'dr9g', 'dr5w', 'dre0', 'dr8h', 'drgw', 'drf5', 'dr9k', 'drd9', 'dr9e', 'dres', 'dreh', 'drek', 'dr8e', 'dr85', 'dr9t', 'dr9v', 'dree', 'dred', 'dr9s', 'dr6g', 'drfh', 'dpxg', 'dr72', 'dr77', 'dr75', 'dr78']
y1 = max(a, key = lambda x : geohash.decode_exactly(x)[0] + geohash.decode_exactly(x)[2])
print geohash.decode_exactly(y1)[0] + geohash.decode_exactly(y1)[2]
y1 = max(a, key = lambda x : geohash.decode_exactly(x)[1] + geohash.decode_exactly(x)[3])
print geohash.decode_exactly(y1)[1] + geohash.decode_exactly(y1)[3]
y1 = min(a, key = lambda x : geohash.decode_exactly(x)[0] - geohash.decode_exactly(x)[2])
print geohash.decode_exactly(y1)[0] - geohash.decode_exactly(y1)[2]
y1 = min(a, key = lambda x : geohash.decode_exactly(x)[1] - geohash.decode_exactly(x)[3])
print geohash.decode_exactly(y1)[1] + geohash.decode_exactly(y1)[3]