def build_matrix_in_hour(hour): dao = Dao() # (lat, lng) -> {loc -> val} location_array = {} for row in dao.read_data_from_target_database('poi_model_result', {'hour': hour}): key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) loc = to_loc(row['day']) if key in location_array: location_array[key][loc] = row['value'] else: location_array[key] = {loc: row['value']} print("load data finished!") res = [] location = {} id = 0 for key in location_array: cur_map = location_array[key] cur_array = [] for i in range(7): if i in cur_map: cur_array.append(cur_map[i]) else: cur_array.append(0) res.append((key, cur_array)) location[key] = id id += 1 matrix = np.zeros((len(res), 7)) for i in range(len(res)): matrix[i] = np.array(res[i][1]) dao.close() return matrix, location
def build_position_to_name(file_name=None): if file_name is None: file_name = DUMP_FILE print(os.path.abspath(file_name)) if os.path.exists(file_name): with open(file_name, 'rb') as file: return pickle.load(file) print("we don't have dump data. build from script, please wait...") dao = Dao() position_name_map = {} count = 0 for row in dao.read_data(): key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) position_name_map[key] = row['name'] count += 1 if count % 1000 == 0: print(count) with open(DUMP_FILE, 'wb') as file: pickle.dump(position_name_map, file) dao.close() return position_name_map
def build_matrix_all_hours(): dao = Dao() location_array = {} for row in dao.read_data(): key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) loc = to_loc(row['month'], row['day'], row['hour']) if key in location_array: location_array[key][loc] = row['value'] else: location_array[key] = {loc: row['value']} print("load data finished!") res = [] location = [] for key in location_array: cur_map = location_array[key] cur_array = [] for i in range(35 * 5): if i in cur_map: cur_array.append(cur_map[i]) else: cur_array.append(0) res.append((key, cur_array)) location.append(key) matrix = np.zeros((len(res), 35 * 5)) for i in range(len(res)): matrix[i] = np.array(res[i][1]) return matrix, location
def find_top_position(top_count): dao = Dao() # read all data to build people count map res_map = {} count = 0 for row in dao.read_data(): count += 1 key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) res_map[key] = res_map.get(key, 0) + row['value'] if count % 10000 == 0: print(count) res_list = [] for key in res_map: res_list.append((key, res_map[key])) # find top res_list.sort(key=lambda x: x[1], reverse=True) final_res = res_list[0:top_count] print(final_res) final_set = set() for row in final_res: final_set.add(row[0]) print('write to view database') cache = [] count = 0 for row in dao.read_data_from_target_database(INTEGRATION_DATABASE): key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) if key in final_set: cache.append({ 'month': row['month'], 'day': row['day'], 'hour': row['hour'], 'lng_gcj02': round(float(row['lng_gcj02']), 3), 'lat_gcj02': round(float(row['lat_gcj02']), 3), 'name': row['name'], 'value': int(row['value']) }) if len(cache) == 100: count += 100 dao.insert_many(VIEW_DATABASE, cache) cache.clear() if count % 10000 == 0: print(count) if len(cache) != 0: dao.insert_many(VIEW_DATABASE, cache) dao.close()
def upload_data(matrix, map): DATABASE = 'correlation_model_result' dao = Dao() cache = [] count = 0 dao.clear_database(DATABASE) hour_list = [7, 12, 15, 20, 21] for i in range(0, len(matrix)): time_matrix = matrix[i] time_map = map[i] base_month = 9 base_day = 24 hour = hour_list[i] for j in time_map: lng, lat = str(j).split(',') for k in range(0, 7): item = { 'year': 2019, 'month': int(base_month), 'day': int(base_day + k), 'hour': hour, 'lng_gcj02': round(float(lng), 3), 'lat_gcj02': round(float(lat), 3), 'value': int(time_matrix[time_map[j]][k]) # 'value': 0 } cache.append(item) # print(cache) if len(cache) == 100: count += 100 # dao.insert_many(DATABASE, cache) try: dao.insert_many(DATABASE, cache) except: print(cache) exit(-1) cache.clear() if count % 1000 == 0: print(count) if len(cache) != 0: dao.insert_many(DATABASE, cache) dao.close()
def model_integration(ratio=(0.333, 0.333, 0.333)): print('model integration') dao = Dao() # clear database count = dao.clear_database(INTEGRATION_DATABASE, { 'month': 9, 'day': { '$gt': 23 } }) print('clear count: ', count) # position_name_map = build_position_to_name() res_map = {} # read data base read_data_from_database(dao, ratio[0], res_map, POI_DATABASE) read_data_from_database(dao, ratio[1], res_map, ARIMA_DATABASE) read_data_from_database(dao, ratio[2], res_map, CORRELATION_DATABSE) # insert into databse print('inserting into final database') cache = [] count = 0 for key in res_map: col = key.split(',') cache.append({ 'month': 9, 'day': int(float(col[2])), 'hour': int(float(col[3])), 'lng_gcj02': round(float(col[0]), 3), 'lat_gcj02': round(float(col[1]), 3), 'name': position_name_map[col[0] + ',' + col[1]], 'value': int(res_map[key]) }) if len(cache) == 100: count += 100 dao.insert_many(INTEGRATION_DATABASE, cache) cache.clear() if count % 10000 == 0: print(count) if len(cache) != 0: dao.insert_many(INTEGRATION_DATABASE, cache) dao.close()
def current_data_for_district_hour(): dao = Dao() # clear database # dao.clear_database(DISTRICT_DATABASE_HOUR) # insert into databse print('transfer current data into district database') district_map = {} count = 0 for row in dao.read_data(): count += 1 key = (row['month'], row['day'], row['typecode'], row['adname'], row['hour']) if key in district_map: district_map[key]['value'] += row['value'] else: district_map[key] = { 'cityname': row['cityname'], 'type': row['type'], 'value': row['value'] } if count % 10000 == 0: print('read: ', count) cache = [] count = 0 for key in district_map: cache.append({ 'month': key[0], 'day': key[1], 'hour': key[4], 'cityname': district_map[key]['cityname'], 'adname': key[3], 'type': district_map[key]['type'], 'typecode': key[2], 'value': district_map[key]['value'] }) if len(cache) == 100: count += 100 dao.insert_many(DISTRICT_DATABASE_HOUR, cache) cache.clear() if count % 10000 == 0: print('write: ', count) if len(cache) != 0: dao.insert_many(DISTRICT_DATABASE_HOUR, cache) dao.close()
def build_data_poi_feature(): dao = Dao() x = [] y = [] count = 0 for row in dao.read_data(): cur_x = [day_to_weekday(row['year'], row['month'], row['day']), row['hour'], row['lng_gcj02'], row['lat_gcj02'], int(row['typecode'][0:6])] x.append(cur_x) y.append(row['value']) count += 1 if count % 10000 == 0: print(count) dao.close() np.savez('../data/poi_model/feature', x=x, y=y) return np.array(x), np.array(y)
def build_matrix_in_hour(hour): dao = Dao() # (lat, lng) -> {loc -> val} location_array = {} for row in dao.read_data({'hour': hour}): key = str(row['lng_gcj02']) + ',' + str(row['lat_gcj02']) loc = to_loc(row['month'], row['day']) if key in location_array: location_array[key][loc] = row['value'] else: location_array[key] = {loc: row['value']} print("load data finished!") res = [] location = [] for key in location_array: cur_map = location_array[key] cur_array = [] for i in range(35): if i in cur_map: cur_array.append(cur_map[i]) else: cur_array.append(0) res.append((key, cur_array)) location.append(key) res.sort(key=lambda x: x[0]) location.sort() matrix = np.zeros((len(res), 35)) for i in range(len(res)): matrix[i] = np.array(res[i][1]) dao.close() return matrix, location
def build_predict_data(): dao = Dao() predict_row = set() count = 0 for row in dao.read_data(): cur_x = (row['lng_gcj02'], row['lat_gcj02'], int(row['typecode'][0:6])) predict_row.add(cur_x) count += 1 if count % 10000 == 0: print(count) x = [] for point in predict_row: for day in (1, 2, 3, 4, 5, 6, 0): for hour in [7, 12, 15, 20, 21]: x.append([day, hour, point[0], point[1], point[2]]) x = np.array(x) # x -> [[day, hour, lng_gcj02, lat_gcj02, typecode],...] np.savez('../data/poi_model/predict', x=x) return x
if __name__ == '__main__': print("reading data from training set...") if os.path.exists("../data/arima_model/matrix" + ".npz"): zip_file = np.load("../data/arima_model/matrix" + ".npz") matrix = zip_file['matrix'] location = zip_file['location'] else: print("No dump file! Reading from original file! Please wait... ") matrix, location = build_matrix() print("reading complete!") res = [] dao = Dao() dao.clear_database(DATABASE) count = 0 start_time = time.time() for i in range(matrix.shape[0]): lng_gcj02 = round(float(location[i].split(",")[0]), 3) lat_gcj02 = round(float(location[i].split(",")[1]), 3) try: predict_data = arima_predict(matrix[i], 2, 1, 5)[0] for j in range(forecast_step): res.append({ 'year': 2019, 'month': 9, 'day': 24 + j // 5, 'hour': hour_map[j % 5], 'lng_gcj02': lng_gcj02,
INVALID = 0 CURRENT = 1 PREDICT = 2 HOUR = 0 DAY = 1 WEEK = 2 INTEGRATION_DATABASE = 'integrated_result' DISTRICT_DATABASE = 'district_result' DISTRICT_DATABASE_HOUR = 'district_result_hour' DUMP_DATA_FILE = './peoplePredict/model/data/dao_service/position_name_map.pkl' # env dao = Dao() position_name = build_position_to_name(DUMP_DATA_FILE) pos_map = {7: 1, 12: 2, 15: 3, 20: 4, 21: 5} # interface def get_map_data(month, day, hour, aggregate): status = check_is_valid(month, day, hour) if status == INVALID: return build_error_resp( 'Invalid date param. Month: ' + str(month) + ' day: ' + str(day) + ' hour: ' + str(hour))
def current_data_integration(): dao = Dao() # clear database dao.clear_database(INTEGRATION_DATABASE) # insert into databse print('transfer current data into final database') cache = [] count = 0 for row in dao.read_data(): cache.append({ 'month': row['month'], 'day': row['day'], 'hour': row['hour'], 'lng_gcj02': round(float(row['lng_gcj02']), 3), 'lat_gcj02': round(float(row['lat_gcj02']), 3), 'name': row['name'], 'value': int(row['value']) }) if len(cache) == 100: count += 100 dao.insert_many(INTEGRATION_DATABASE, cache) cache.clear() if count % 10000 == 0: print(count) if len(cache) != 0: dao.insert_many(INTEGRATION_DATABASE, cache) dao.close()