コード例 #1
0
ファイル: other.py プロジェクト: lsp140510/Mobike
def get_latlon(result, end=True):
    if end:
        eloc_latlon = result['geohashed_end_loc'].apply(lambda x: geohash.decode_exactly(x))
        result['eloc_lat'] = eloc_latlon.apply(lambda x: float(x[0]))
        result['eloc_lon'] = eloc_latlon.apply(lambda x: float(x[1]))
    sloc_latlon = result['geohashed_start_loc'].apply(lambda x: geohash.decode_exactly(x))
    result['sloc_lat'] = sloc_latlon.apply(lambda x: float(x[0]))
    result['sloc_lon'] = sloc_latlon.apply(lambda x: float(x[1]))
    if end:
        result['eloc_sloc_lat_sub'] = result['eloc_lat'] - result['sloc_lat']
        result['eloc_sloc_lon_sub'] = result['eloc_lon'] - result['sloc_lon']
    return result
コード例 #2
0
def get_latlon(result):
    eloc_latlon = result['geohashed_end_loc'].apply(
        lambda x: Geohash.decode_exactly(x))
    result['eloc_lat'] = eloc_latlon.apply(lambda x: float(x[0]))
    result['eloc_lon'] = eloc_latlon.apply(lambda x: float(x[1]))
    sloc_latlon = result['geohashed_start_loc'].apply(
        lambda x: Geohash.decode_exactly(x))
    result['sloc_lat'] = sloc_latlon.apply(lambda x: float(x[0]))
    result['sloc_lon'] = sloc_latlon.apply(lambda x: float(x[1]))

    result['eloc_sloc_lat_sub'] = result['eloc_lat'] - result['sloc_lat']
    result['eloc_sloc_lon_sub'] = result['eloc_lon'] - result['sloc_lon']
    return result
コード例 #3
0
ファイル: other.py プロジェクト: zhilangtaosha/Mobike-1
def get_distance(result):
    locs = list(
        set(result['geohashed_start_loc']) | set(result['geohashed_end_loc']))
    if np.nan in locs:
        locs.remove(np.nan)
    deloc = []
    for loc in locs:
        deloc.append(geohash.decode_exactly(loc))
    loc_dict = dict(zip(locs, deloc))
    geohashed_loc = result[['geohashed_start_loc', 'geohashed_end_loc']].values
    distance = []
    manhattan_distance = []
    for i in geohashed_loc:
        if i[0] is not np.nan and i[1] is not np.nan:
            lat1, lon1, _, _ = loc_dict[i[0]]
            lat2, lon2, _, _ = loc_dict[i[1]]
            distance.append(
                cal_distance(float(lat1), float(lon1), float(lat2),
                             float(lon2)))
            manhattan_distance.append(
                manhattan(float(lat1), float(lon1), float(lat2), float(lon2)))
        else:
            distance.append(np.nan)
            manhattan_distance.append(np.nan)
    result.loc[:, 'distance'] = distance
    result.loc[:, 'manhattan'] = manhattan_distance
    return result
コード例 #4
0
ファイル: bulk_event.py プロジェクト: bkj/jinsta
def indexEvents(x):
    
    geoh = x['geohash']
    lat = Geohash.decode_exactly(geoh)[0]
    lon = Geohash.decode_exactly(geoh)[1]
    for e in x['data']:
        rec = {}
        rec['geoloc'] = {'lat':lat, 'lon': lon}
        rec['geohash'] = geoh
        rec['tags'] = []
        rec['count'] = e['count']
        rec['datetime'] = e['event']
        rec['images'] = e['likes']
        print e
        for tag in e['tags'].keys():
            rec['tags'].append({"name":tag, "count":e['tags'][tag]})
	es.index(index='instagram_events_j_final',doc_type='dc',body=rec)        
コード例 #5
0
 def hotspots(self, family, name):
     query = Search(using=self.client, index=family)
     if name:
         query = query.filter('term', name=name)
     query = query.filter('range', timestamp={'gte':self.lookback})
     query.aggs.bucket('hotspot', 'geohash_grid', field='location', precision=7)
     hashes = query[0].execute().aggregations['hotspot']['buckets'][:3]
     return [Geohash.decode_exactly(hash['key'])[:2] for hash in hashes]
コード例 #6
0
	def geogrid(self, query='*', region='*', min_published_on=None, max_published_on=None, weight=False):
		payload = {
		  "size": 1000,
		  "query": {
		    "bool": {
		      "must": {
		        "query_string": {
		          "fields": ["title", "description"],
		          "query": "*",
		          "analyze_wildcard": True
		        }
		      },
		      "filter": {
		        "bool": {
		          "must": [
		            {
		              "range": {
		                "published_on": {  "format": "epoch_millis",
		                  "gte": min_published_on or self._default_min_published_on,
		                  "lte": max_published_on or self._default_max_published_on
		                }
		              }
		            }
		          ]
		        }
		      }
		    }
		  },
		  "aggs": {
		    "geogrid": {
		      "geohash_grid": {
		        "field": "geo",
		        "precision": 6
		      }
		    }
		  }
		}
		if weight:
			payload['aggs']['geogrid']['aggs'] = {
			  "weight": {
			    "sum": {
			      "field": "geoweight"
			    }
			  }
			}
		self._add_theme(payload, query)
		self._add_region(payload, region)
		geogrid = self.request(payload)
		for row in geogrid['aggregations']['geogrid']['buckets']:
			lat, lon, _, _ = Geohash.decode_exactly(row['key'])
			row['lat'] = lat
			row['lon'] = lon
		return geogrid
コード例 #7
0
def get_geohash():

    x = []
    y = []

    data1 = pd.read_csv("train.csv")
    data2 = pd.read_csv("test.csv")

    hash1 = list(data1["geohashed_start_loc"])
    hash2 = list(data1["geohashed_end_loc"])
    hash3 = list(data2["geohashed_start_loc"])

    #print(hash1[:100],"\n",hash2[:100],"\n",hash3[:100],"\n",(len(hash1)+len(hash2)+len(hash3)))
    _hash = []
    _hash.extend(hash1)
    _hash.extend(hash2)
    _hash.extend(hash3)
    print(_hash.__len__())

    _hash = list(set(_hash))

    print("hash:", len(_hash))

    # 这里遇到的问题:一个经纬度竟然对应多个geohash值,以经纬度作为键,得到的结果是长度为5890的字典,
    # 而以geohash作为键,得到的是长度为11万左右的字典
    # 原因是解码后精度不够高!
    # 也就是说,实际上在一开始数据处理的时候就应该使用精度更高的geohash_exactly!
    # 但是现在没有必要去改,如果为了更加提高精度,就需要去更改
    # TODO:将数据集采用gh.decode_exactly得到精度更高的结果重新处理一下,但是对于原始数据不行,因为之后要贝叶斯分类
    # TODO:所以推荐的方法是只改变将start loc 和end loc变成精度更高的数据进行训练

    for i in _hash:
        t = gh.decode_exactly(i)
        x.append(t[0])
        y.append(t[1])

    x.sort()
    y.sort()

    print(x[0:100])
    print(y[0:100])

    between_x = []
    between_y = []

    # 这里获得原始具有高精度的geo数据,两两相减,为的就是获取最小的间隔,之后在预测值左右的间隔上获取编码
    for i in range(len(x) - 1):
        between_x.append(x[i] - x[i + 1])
        between_y.append(y[i] - y[i + 1])
    print(between_x[0:100])
    print(between_y[0:100])
    print(np.mean(np.array(between_x)))
    print(np.mean(np.array(between_y)))
コード例 #8
0
def get_loc_dict():
    dump_path = cache_path + 'loc_dict.pkl'
    if os.path.exists(dump_path):
        loc_dict = pickle.load(open(dump_path, 'rb+'))
    else:
        train = pd.read_csv(train_path)
        test = pd.read_csv(test_path)
        locs = list(set(train['geohashed_start_loc']) | set(train['geohashed_end_loc']) | set(test['geohashed_start_loc']))
        deloc = []
        for loc in locs:
            deloc.append(Geohash.decode_exactly(loc)[:2])
        loc_dict = dict(zip(locs, deloc))
        pickle.dump(loc_dict, open(dump_path, 'wb+'))
    return loc_dict
コード例 #9
0
def get_hash(filename, col_name):
    x = []
    y = []
    data = pd.read_csv(filename)

    geohash = data[col_name]
    b = len(geohash)

    for i in tqdm.trange(b):
        a = gh.decode_exactly(geohash[i])
        x.append(a[0])
        y.append(a[1])
    print(x, y)
    return x, y
コード例 #10
0
ファイル: other.py プロジェクト: lsp140510/Mobike
def get_distance(result):
    locs = list(set(result['geohashed_start_loc']) | set(result['geohashed_end_loc']))
    if np.nan in locs: 
        locs.remove(np.nan)
    deloc = []
    for loc in locs:
        deloc.append(geohash.decode_exactly(loc))
    loc_dict = dict(zip(locs, deloc))
    geohashed_loc = result[['geohashed_start_loc', 'geohashed_end_loc']].values
    distance = []
    manhattan_distance = []
    for i in geohashed_loc:
        if i[0] is not np.nan and i[1] is not np.nan:
            lat1, lon1, _, _ = loc_dict[i[0]]
            lat2, lon2, _, _ = loc_dict[i[1]]
            distance.append(cal_distance(float(lat1), float(lon1), float(lat2), float(lon2)))
            manhattan_distance.append(manhattan(float(lat1), float(lon1), float(lat2), float(lon2)))
        else:
            distance.append(np.nan)
            manhattan_distance.append(np.nan)
    result.loc[:, 'distance'] = distance
    result.loc[:, 'manhattan'] = manhattan_distance
    return result
コード例 #11
0
ファイル: latlon.py プロジェクト: lsp140510/Mobike
def get_eloc_latlon(result):
    eloc_latlon = result['geohashed_end_loc'].apply(lambda x: geohash.decode_exactly(x)[:2])
    result['eloc_lat'] = eloc_latlon.apply(lambda x: float(x[0]))
    result['eloc_lon'] = eloc_latlon.apply(lambda x: float(x[1]))
    return result
コード例 #12
0
ファイル: 0.244规则.py プロジェクト: fangcaotank/mobike
def get_x(pos):
    x, y, x_m, y_m = geo.decode_exactly(str(pos))
    return x
コード例 #13
0
# 首先对日期时间转化
import datetime

train.loc[:, 'starttime'] = pd.to_datetime(train.starttime)
train['weekday_time'] = train.starttime.dt.weekday
train['hour_time'] = train.starttime.dt.hour
train['minute_time'] = train.starttime.dt.minute

# In[25]:

# 对经纬度解码
import Geohash

start = list(train.geohashed_start_loc)
start_jw = list(map(lambda x: Geohash.decode_exactly(x), start))
end = list(train.geohashed_end_loc)
end_jw = list(map(lambda x: Geohash.decode_exactly(x), end))
train[['start_lat', 'start_lon', 'start_lat_exactly',
       'start_lon_exactly']] = pd.DataFrame(start_jw,
                                            columns=[
                                                'start_lat', 'start_lon',
                                                'start_lat_exactly',
                                                'start_lon_exactly'
                                            ])
train[['end_lat', 'end_lon', 'end_lat_exactly',
       'end_lon_exactly']] = pd.DataFrame(
           end_jw,
           columns=['end_lat', 'end_lon', 'exactly_lat', 'exactly_lon'])

# In[26]:
コード例 #14
0
import Geohash

geo_hash_str = Geohash.encode(39.92324, 116.3906, 5)
print(geo_hash_str)

# looking for the corne

corne_list = Geohash.decode('wx4g0')

print(corne_list)

# exactly point information
exactly_point = Geohash.decode_exactly('wx4g0')
print(exactly_point)
コード例 #15
0
ファイル: latlon.py プロジェクト: zhilangtaosha/Mobike-1
def get_eloc_latlon(result):
    eloc_latlon = result['geohashed_end_loc'].apply(
        lambda x: geohash.decode_exactly(x)[:2])
    result['eloc_lat'] = eloc_latlon.apply(lambda x: float(x[0]))
    result['eloc_lon'] = eloc_latlon.apply(lambda x: float(x[1]))
    return result
コード例 #16
0
            }
        }
    }
}

res = es.search(index=fromIndex,
                search_type="count",
                body=body,
                request_timeout=3600)
buckets = res["aggregations"]["mygrid"]["buckets"]

# prettyPrint(buckets)

bulkActions = []
for geobucket in buckets:
    geo = Geohash.decode_exactly(geobucket['key'])
    print geobucket['key'], geo[0], geo[1]
    for bucket in geobucket['ingridhist']['buckets']:
        m = moment.unix(bucket['key']).date  ##.add(hours=4).date
        doc = {
            "@timestamp": bucket['key_as_string'],
            "startLocation": [geo[1], geo[0]]
        }
        if ("the_count" in bucket):
            doc["the_count"] = bucket["the_count"]["value"]
        if ("prediction" in bucket):
            doc["prediction"] = bucket["prediction"]["value"]
        if ('prediction' in doc and 'the_count' in doc):
            doc['surprise'] = max(
                0, 10.0 * (doc["the_count"] - doc["prediction"]) /
                doc["prediction"])
コード例 #17
0
            }
          }
        }
      }
    }
  }
}

res = es.search(index=fromIndex, search_type="count",body=body, request_timeout=3600)
buckets = res["aggregations"]["mygrid"]["buckets"]

# prettyPrint(buckets)

bulkActions = []
for geobucket in buckets:
	geo = Geohash.decode_exactly(geobucket['key'])
	print geobucket['key'], geo[0], geo[1]
	for bucket in geobucket['ingridhist']['buckets']:
		m = moment.unix(bucket['key']).date ##.add(hours=4).date
		doc = {
			"@timestamp": bucket['key_as_string'],
			"startLocation": [geo[1], geo[0]]
		}
		if("the_count" in bucket):
			doc["the_count"] = bucket["the_count"]["value"]
		if("prediction" in bucket):
			doc["prediction"] = bucket["prediction"]["value"]
		if('prediction' in doc and 'the_count' in doc):
			doc['surprise'] = max(0, 10.0 * (doc["the_count"] - doc["prediction"]) / doc["prediction"])
		action = {
			"_index": toIndex,
コード例 #18
0
ファイル: latlon.py プロジェクト: zhilangtaosha/Mobike-1
def get_sloc_latlon(result):
    sloc_latlon = result['geohashed_start_loc'].apply(
        lambda x: geohash.decode_exactly(x)[:2])
    result['sloc_lat'] = sloc_latlon.apply(lambda x: float(x[0]))
    result['sloc_lon'] = sloc_latlon.apply(lambda x: float(x[1]))
    return result
コード例 #19
0
ファイル: latlon.py プロジェクト: lsp140510/Mobike
def get_sloc_latlon(result):
    sloc_latlon = result['geohashed_start_loc'].apply(lambda x: geohash.decode_exactly(x)[:2])
    result['sloc_lat'] = sloc_latlon.apply(lambda x: float(x[0]))
    result['sloc_lon'] = sloc_latlon.apply(lambda x: float(x[1]))
    return result
コード例 #20
0
import Geohash
#works fine

#print(Geohash.encode(45.0602750,7.6548340))
arr = Geohash.decode_exactly("u0j2q4yp4s1f")
print(arr[1], " ", arr[2])