def main(): dataset_path = './dataset/h_train' val = [24, 25] datas = [] noises = [1, 5, 6, 12, 13, 19, 20] for i in range(25): fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1)) data = np.load(fpath) # 144 * 81 * 2 data = torch.from_numpy(data) data = torch.einsum("ijk->jik", data) # 81 * 144 * 2 # 浪费内存吧,反正内存不值钱 ids = torch.from_numpy(np.array([np.ones(144) * i for i in range(81)])).double() # ids = ids.repeat(144, 1) ids = ids.view(81, 144, 1) data = torch.cat([data, ids], dim=2) assert data.shape == (81, 144, 3) datas.append(data) X = [] for i in range(24 - 1): if i + 1 in noises: continue if i + 2 in noises: continue a = datas[i] b = datas[i + 1] c = torch.cat((a, b), dim=2) X.append(c) graph = read_graph_csv() rows, cols = graph.nonzero() rows = rows.reshape(-1) cols = cols.reshape(-1) edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda() edge_weight = torch.from_numpy(graph[rows, cols]).float().cuda() x = torch.tensor([i for i in range(81)], dtype=torch.float).cuda() graph_data = geoData(x, edge_index=edge_index, edge_attr=edge_weight) all_data = torch.cat(X, dim=0).float().cuda() a = datas[val[0] - 1] b = datas[val[1] - 1] val_data = torch.cat((a, b), dim=2).float().cuda() torch_dataset = Data.TensorDataset(all_data[:, :, :3], all_data[:, :, 3:5]) loader = Data.DataLoader(dataset=torch_dataset, batch_size=batch_size, shuffle=True, num_workers=0) model = Model(graph_data) model.cuda() crition = nn.L1Loss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) for epoch in range(epochs): total_loss = [] time1 = time.time() model.train() for step, (batch_x, batch_y) in enumerate(loader): optimizer.zero_grad() X = batch_x y = batch_y[:, :, :] pred_y = model(X) loss = crition(pred_y, y) total_loss.append(loss.data.cpu().numpy()) loss.backward() optimizer.step() model.eval() val_X = val_data[:, :, :3] val_y = val_data[:, :, 3:5] # print(val_y.shape) pred_y = model(val_X) # print(pred_y.shape, val_y.shape) val_loss = crition(pred_y, val_y) a = pred_y.data.cpu().numpy().reshape(1, -1) b = val_y.data.cpu().numpy().reshape(1, -1) val_loss_sklearn = mean_absolute_error(a, b) print("Epoch", epoch, 'train loss:', np.mean(total_loss)) print("Epoch", epoch, 'validation loss:', val_loss.data.cpu().numpy().mean()) print(time.time() - time1)
def main(): dataset_path = './dataset/h_train' val = [24, 25] datas = [] noises = [1, 5, 6, 12, 13, 19, 20] noises2 = [5, 6, 12, 13, 19, 20] for i in range(25): fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1)) data = np.load(fpath) # 144 * 81 * 2 data = torch.from_numpy(data) data = torch.einsum("ijk->jik", data) # 81 * 144 * 2 # 浪费内存吧,反正内存不值钱 ids = torch.from_numpy(np.array([np.ones(144) * i for i in range(81)])).double() ids = ids.view(81, 144, 1) data = torch.cat([data, ids], dim=2) assert data.shape == (81, 144, 3) datas.append(data) # # print(datas[0][0,0,2], datas[0][0,1,2]) # return? avg = torch.zeros(81, 144, 2, dtype=torch.float) counts = 0 # 将均值搞成节点属性 for i in range(ALL_DATA - 1): if i + 1 in noises: continue item = datas[i] avg += item[:, :, 0:2].float() counts += 1 avg /= counts print(counts) X = [] for i in range(ALL_DATA - 1): if i + 1 in noises: continue if i + 2 in noises: continue a = datas[i] b = datas[i + 1] c = torch.cat((a, b), dim=2) X.append(c) # for j in range(2, 8): j = 8 if i + j < 23 and (i + j + 1) not in noises: a = datas[i] b = datas[i + j] c = torch.cat((a, b), dim=2) X.append(c) graph = read_graph_csv() # graph_floyd = read_graph_csv('./maps/graph_floyd.csv') graph_floyd = np.load('./hjj_maps/avg_25.npy') # graph = graph_floyd # np.fill_diagonal(graph_floyd, 0) # print(graph_floyd) # b = graph_floyd / graph_floyd.sum(axis=1) # print(b) # c = np.zeros((81, 81)) # c[b > 0.01] = 1 # graph = c # print(graph) # print(graph.sum()) rows, cols = graph.nonzero() rows = rows.reshape(-1) cols = cols.reshape(-1) edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda() edge_weight = torch.from_numpy(graph_floyd[rows, cols]).float().cuda() # x = torch.tensor([i for i in range(81)], dtype=torch.float).cuda() # x就得修改 x = avg.cuda() graph_data = geoData(x, edge_index=edge_index, edge_attr=edge_weight) all_data = torch.cat(X, dim=0).float().cuda() a = datas[val[0] - 1] b = datas[val[1] - 1] val_data = torch.cat((a, b), dim=2).float().cuda() torch_dataset = Data.TensorDataset(all_data[:, :, :3], all_data[:, :, 3:5]) loader = Data.DataLoader(dataset=torch_dataset, batch_size=batch_size, shuffle=True, num_workers=0) model = Model(graph_data) model.cuda() crition = nn.L1Loss() # crition = F.mse_loss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) for epoch in range(epochs): total_loss = [] time1 = time.time() model.train() # model.eval() for step, (batch_x, batch_y) in enumerate(loader): optimizer.zero_grad() X = batch_x y = batch_y[:, :, :] # print(X[0, 1, 2]) # print(X[0, 2, 2]) # return 0 pred_y = model(X) loss = crition(pred_y, y) # loss1 = F.mse_loss(pred_y, y) total_loss.append(loss.data.cpu().numpy()) loss.backward() optimizer.step() model.eval() val_X = val_data[:, :, :3] val_y = val_data[:, :, 3:5] # print(val_y.shape) pred_y = model(val_X) # print(pred_y.shape, val_y.shape) val_loss = crition(pred_y, val_y) a = pred_y.data.cpu().numpy().reshape(1, -1) b = val_y.data.cpu().numpy().reshape(1, -1) avg_c = avg.reshape(1, -1) val_loss_sklearn = mean_absolute_error(a, b) avg_loss = mean_absolute_error(avg_c, b) train_loss = np.mean(total_loss) val_loss = val_loss.data.cpu().numpy().mean() print("Epoch", epoch, 'train loss:', train_loss) print("Epoch", epoch, 'validation loss:', val_loss_sklearn) print("Epoch", epoch, 'avg loss:', np.mean(avg_loss)) print(time.time() - time1) writer.add_scalars("scalar/loss", {'train_loss': train_loss}, epoch) writer.add_scalars("scalar/loss", {'val_loss': val_loss}, epoch) writer.add_scalars("scalar/loss", {'avg_loss': avg_loss}, epoch) writer.add_scalars("scalar/loss", {'13.5': 13.5}, epoch) writer.add_scalars("scalar/loss", {'13.3': 13.3}, epoch) writer.add_scalars("scalar/loss", {'13.1': 13.1}, epoch) writer.add_scalars("scalar/loss", {'12': 12}, epoch) writer.add_scalars("scalar/loss", {'11': 11}, epoch) writer.add_scalars("scalar/loss", {'10': 10}, epoch) if epoch % 20 == 0 and not DEBUG: fpath = os.path.join(dataset_path, '28.npy') test_data = np.load(fpath) # data = torch.from_numpy(data) # data = torch.einsum("ijk->jik", data) # # 81 * 144 * 2 # # 浪费内存吧,反正内存不值钱 # ids = torch.from_numpy(np.array([i for i in range(81)])).double() # ids = ids.repeat(144, 1) # ids = ids.view(81, 144, 1) # data = torch.cat([data, ids], dim=2) test_data = torch.from_numpy(test_data) test_data = torch.einsum("ijk->jik", test_data) ids = torch.from_numpy( np.array([np.ones(144) * i for i in range(81)])).double() ids = ids.view(81, 144, 1) test_data = torch.cat([test_data, ids], dim=2).float().cuda() res = model(test_data) res = torch.einsum('ijk->jik', res) res = res.data.cpu().numpy() def time2str(id, date): dt = datetime.datetime.strptime(date, "%Y-%m-%d") t1 = time.mktime(dt.timetuple()) + int(id) * 10 * 60 t2 = t1 + 10 * 60 t1_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t1)) t2_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t2)) return t1_str, t2_str date = '2019-01-29' with open( './results/hjj_gcn_lstm_crazy/{}-{}.csv'.format( date, epoch), 'w') as f: title = 'stationID,startTime,endTime,inNums,outNums' print(title, file=f) x, y, z = res.shape print(res[0][0]) for j in range(y): for i in range(x): t1, t2 = time2str(i, date) out_num, in_num = res[i][j] # 0出,1进 print(j, t1, t2, in_num, out_num, sep=',', file=f)
def main(): dataset_path = './dataset/h_train' val = [19, 20] datas = [] noises = [1, 5, 6, 12, 13, 19, 20, 26] for i in range(26): fpath = os.path.join(dataset_path, '{}.npy'.format(i + 1)) data = np.load(fpath) # 144 * 81 * 2 data = torch.from_numpy(data) data = torch.einsum("ijk->jik", data) # 81 * 144 * 2 # 浪费内存吧,反正内存不值钱 ids = torch.from_numpy(np.array([np.ones(144) * i for i in range(81)])).double() ids = ids.view(81, 144, 1) data = torch.cat([data, ids], dim=2) assert data.shape == (81, 144, 3) datas.append(data) # # print(datas[0][0,0,2], datas[0][0,1,2]) # return? avg = torch.zeros(81, 144, 2, dtype=torch.float) counts = 0 # 将均值搞成节点属性 for i in range(26): #if i + 1 in noises: # continue item = datas[i] avg += item[:, :, 0:2].float() counts += 1 day_28 = np.load(os.path.join(dataset_path, '28.npy')) day_28 = torch.from_numpy(day_28).float() day_28 = torch.einsum("ijk->jik", day_28) avg += day_28 counts += 1 avg /= counts print(counts, 235) X = [] ## 因为noise是日期,所以需要都+1判断 for i in range(25): #if i == 19: continue #### Debug if i + 1 in noises: if i + 2 in noises: print(i, i + 1) a = datas[i] b = datas[i + 1] c = torch.cat((a, b), dim=2) X.append(c) continue if i + 2 in noises: continue more = 1 # if i > 20 : # more = 2 for _ in range(more): a = datas[i] b = datas[i + 1] c = torch.cat((a, b), dim=2) X.append(c) # j = 8 # if i + j < 23 and (i+j+1) not in noises: # a = datas[i] # b = datas[i + j] # c = torch.cat((a, b), dim=2) # X.append(c) graph = read_graph_csv() graph_floyd = read_graph_csv('./maps/graph_floyd.csv') graph_floyd = np.load('./hjj_maps/avg_25.npy') rows, cols = graph.nonzero() rows = rows.reshape(-1) cols = cols.reshape(-1) edge_index = torch.tensor([rows, cols], dtype=torch.float).cuda() # x就得修改 x = avg.cuda() graph_data = geoData(x, edge_index=edge_index) all_data = torch.cat(X, dim=0).float().cuda() a = datas[val[0] - 1] b = datas[val[1] - 1] val_data = torch.cat((a, b), dim=2).float().cuda() model = Model(graph_data, graph) print(model.eval()) # print(avg[0]) model.cuda() crition = nn.L1Loss() crition2 = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) assert all_data.shape[0] % 81 == 0, '一天81个站点' data_number = all_data.shape[0] // 81 print(data_number, ) for epoch in range(epochs): total_loss = [] time1 = time.time() ids = np.arange(data_number) np.random.shuffle(ids) model.train() for i in ids: optimizer.zero_grad() batch_x = all_data[i * 81:(i + 1) * 81, :, :3] batch_y = all_data[i * 81:(i + 1) * 81, :, 3:] X = batch_x y = batch_y[:, :, :2] # print(X[0, 1, 2]) # print(X[0, 2, 2]) # return 0 assert batch_x[-1, 1, 2] == 80 assert batch_y[-1, 1, 2] == 80 pred_y = model(X) # print(y.shape, pred_y.shape) loss = crition(pred_y, y) loss1 = crition2(pred_y, y) total_loss.append(loss.data.cpu().numpy()) loss1.backward() optimizer.step() model.eval() val_X = val_data[:, :, :3] val_y = val_data[:, :, 3:5] # print(val_y.shape) pred_y = model(val_X) # print(pred_y.shape, val_y.shape) val_loss = crition(pred_y, val_y) a = pred_y.data.cpu().numpy().reshape(1, -1) b = val_y.data.cpu().numpy().reshape(1, -1) avg_c = avg.reshape(1, -1) val_loss_sklearn = mean_absolute_error(a, b) avg_loss = mean_absolute_error(avg_c, b) train_loss = np.mean(total_loss) val_loss = val_loss.data.cpu().numpy().mean() print("Epoch", epoch, 'train loss:', train_loss) print("Epoch", epoch, 'validation loss:', val_loss) print("Epoch", epoch, 'avg loss:', np.mean(avg_loss)) print(time.time() - time1) writer.add_scalars("scalar/loss", {'train_loss': train_loss}, epoch) writer.add_scalars("scalar/loss", {'val_loss': val_loss}, epoch) writer.add_scalars("scalar/loss", {'avg_loss': avg_loss}, epoch) writer.add_scalars("scalar/loss", {'13.5': 13.5}, epoch) writer.add_scalars("scalar/loss", {'13.3': 13.3}, epoch) writer.add_scalars("scalar/loss", {'13.1': 13.1}, epoch) if epoch % 100 == 0 and not DEBUG: fpath = os.path.join(dataset_path, '26.npy') test_data = np.load(fpath) test_data = torch.from_numpy(test_data) test_data = torch.einsum("ijk->jik", test_data) ids = torch.from_numpy( np.array([np.ones(144) * i for i in range(81)])).double() ids = ids.view(81, 144, 1) test_data = torch.cat([test_data, ids], dim=2).float().cuda() res = model(test_data) res = torch.einsum('ijk->jik', res) res = res.data.cpu().numpy() def time2str(id, date): dt = datetime.datetime.strptime(date, "%Y-%m-%d") t1 = time.mktime(dt.timetuple()) + int(id) * 10 * 60 t2 = t1 + 10 * 60 t1_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t1)) t2_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t2)) return t1_str, t2_str date = '2019-01-27' filename = __file__ filename = filename.replace('.py', '') if not os.path.exists('./results/hjj_map_all'): os.mkdir('./results/hjj_map_all') with open( './results/hjj_map_all/{}-{}.csv'.format(filename, epoch), 'w') as f: title = 'stationID,startTime,endTime,inNums,outNums' print(title, file=f) x, y, z = res.shape print(res[0][0]) for j in range(y): for i in range(x): t1, t2 = time2str(i, date) out_num, in_num = res[i][j] # 0出,1进 print(j, t1, t2, in_num, out_num, sep=',', file=f)