def init(args): conf_path = os.path.join(args.conf) info = ct.load_json_file(conf_path) info["time"] = datetime.now().strftime("%Y-%m-%d-%H:%M:%S.%f") path = os.path.join(info['model_path'], info['logname'] + info["time"]) ct.mkdirs(path) info['log_dir'] = path update(vars(args), info) del info
def init(args): # 读取conf文件,初始化 conf_path = osp.join(args.conf) info = ct.load_json_file(conf_path) info["time"] = datetime.now().strftime("%Y-%m-%d-%H-%M-%S.%f") update(vars(args), info) vars(args)["path"] = osp.join(args.model_path, args.logname + args.time) ct.mkdirs(args.path) del info
def main(args): # seed_set() logger = init_log(args) logger.info("params : %s", vars(args)) ct.mkdirs(args.save_data_path) for year in range(args.begin_year, args.end_year + 1): vars(args)['year'] = year inputs = generate_samples(osp.join(args.save_data_path, str(year)), np.load(osp.join(args.raw_data_path, str(year)+'.npz'))['x'], graph) \ if args.data_process else np.load(osp.join(args.save_data_path, str(year)+'.npz')) train(inputs, args)
def init_log(args): log_dir, log_filename = args.log_dir, args.logname logger = logging.getLogger(__name__) ct.mkdirs(log_dir) logger.setLevel(logging.INFO) fh = logging.FileHandler(os.path.join(log_dir, log_filename + '.log')) fh.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info('logger name:%s', os.path.join(log_dir, log_filename + '.log')) vars(args)['logger'] = logger return logger
def main(args): year = 2012 logger = init_log(args) logger.info("params : %s", vars(args)) ct.mkdirs(args.save_data_path) #graph = nx.from_numpy_matrix(np.load(osp.join(args.graph_path, str(year)+"_adj.npz"))["x"]) graph = nx.from_numpy_matrix(np.load(osp.join(args.graph_path))) vars(args)["graph_size"] = graph.number_of_nodes() vars(args)["year"] = year # 如果data_process为真,则预处理数据,否则直接加载 # inputs = generate_samples(31, osp.join(args.save_data_path, str(year)+'_30day'), np.load(osp.join(args.raw_data_path, str(year)+".npz"))["x"], graph, val_test_mix=True) \ inputs = generate_samples(10, args.save_data_path, np.load(args.raw_data_path)["data"], graph, val_test_mix=True) \ if args.data_process else np.load(osp.join(args.save_data_path), allow_pickle=True) # args.logger.info("[*] Year {} load from {}_30day.npz".format(args.year, osp.join(args.save_data_path, str(year)))) # 加载邻接矩阵 adj = np.load(osp.join(args.graph_path)) adj = adj / (np.sum(adj, 1, keepdims=True) + 1e-6) vars(args)["adj"] = torch.from_numpy(adj).to(torch.float).to(args.device) # 训练还是测试 if args.train: train(inputs, args) else: model, _ = load_best_model(args) test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) test_model(model, args, test_loader, pin_memory=True) # 3,6,12 表示对时间长度为15/30/60min的序列的预测,指标为 mae,mape,rmse for i in [3, 6, 12]: info = "" for j in ['mae', 'rmse', 'mape']: info += "{:.2f}\t".format(result[i][j][year]) logger.info("{}\t{}\t".format(i, j) + info) info = "year\t{}\ttotal_time\t{}\taverage_time\t{}\tepoch\t{}".format( year, result[year]["total_time"], result[year]["average_time"], result[year]['epoch_num']) logger.info(info)
def init_log(args): # 初始化log log_dir, log_filename = args.path, args.logname logger = logging.getLogger(__name__) ct.mkdirs(log_dir) logger.setLevel(logging.INFO) fh = logging.FileHandler(osp.join(log_dir, log_filename + ".log")) fh.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter("%(asctime)s - %(message)s") fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("logger name:%s", osp.join(log_dir, log_filename + ".log")) vars(args)["logger"] = logger return logger
def train(inputs, args): # model setting path = os.path.join(args.model_path, args.logname + args.time, str(args.year)) ct.mkdirs(path) # writer = SummaryWriter(os.path.join(path, "tsborad")) model = Basic_Model(args).to(args.device) optimizer = optim.AdamW(model.parameters(), lr=args.lr) if args.scheduler == 'cos': scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=10, T_mult=2, eta_min=0.001) elif args.scheduler == 'epo': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.99) if args.loss == 'mse': lossfunc = func.mse_loss elif args.loss == 'huber': lossfunc = func.smooth_l1_loss total_time = 0.0 #### dataset definition train_loader = DataLoader(TrafficDataset(inputs, 'train'), batch_size=args.batch_size, shuffle=True, pin_memory=pin_memory, num_workers=n_work) val_loader = DataLoader(TrafficDataset(inputs, 'val'), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) test_loader = DataLoader(TrafficDataset(inputs, 'test'), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) args.logger.info("[*] Dataset load!") args.logger.info("[*] start") global_train_steps = len(train_loader) // args.batch_size + 1 iters = len(train_loader) lowest_validation_loss = 1e7 counter = 0 patience = 10 for epoch in range(args.epoch): training_loss = 0.0 start_time = datetime.now() # train model cn = 0 for batch_idx, data in enumerate(train_loader): # data_time = datetime.now() if args.scheduler == 'cos': scheduler.step(epoch + batch_idx / iters) data = data.to(device, non_blocking=pin_memory) optimizer.zero_grad() pred = model(data) if args.strategy == 'incremental_only' and args.year != args.begin_year: pred, _ = to_dense_batch(pred, batch=data.batch) data.y, _ = to_dense_batch(data.y, batch=data.batch) pred = pred[:, args.node_list, :] data.y = data.y[:, args.node_list, :] loss = lossfunc(data.y, pred, reduction='mean') training_loss += float(loss) loss.backward() optimizer.step() # loss_time = datetime.now() - loss_time # print("loss time:",loss_time.total_seconds()) total_time += (datetime.now() - start_time).total_seconds() cn += 1 # if cn == math.ceil(len(train)/args.batch_size): # writer.add_scalar('training_loss', training_loss/cn, epoch * len(train) + batch_idx) training_loss = training_loss / cn # validate model validation_loss = 0.0 # validation_loss = test(model, val, args.device, args.n, pin_memory) cn = 0 with torch.no_grad(): for batch_idx, data in enumerate(val_loader): data = data.to(device, non_blocking=pin_memory) pred = model(data) if args.strategy == 'incremental_only' and args.year != args.begin_year: pred, _ = to_dense_batch(pred, batch=data.batch) data.y, _ = to_dense_batch(data.y, batch=data.batch) pred = pred[:, args.node_list, :] data.y = data.y[:, args.node_list, :] loss = lossfunc(data.y, pred) validation_loss += loss cn += 1 # if cn == math.ceil(len(val)/args.batch_size): # writer.add_scalar('validation_loss', validation_loss/cn, epoch * len(val) + batch_idx) validation_loss = float(validation_loss / cn) # scheduler.step(validation_loss) if args.scheduler == 'epo': scheduler.step() args.logger.info( f"epoch:{epoch}, training loss:{training_loss:.4f} validation loss:{validation_loss:.4f}" ) if args.nni: nni.report_intermediate_result(validation_loss) # early stopping if validation_loss <= lowest_validation_loss: counter = 0 lowest_validation_loss = round(validation_loss, 4) torch.save( model, os.path.join(path, str(round(validation_loss, 4)) + ".pkl")) else: counter += 1 if counter > patience: break best_model_path = os.path.join(path, str(lowest_validation_loss) + ".pkl") best_model = torch.load(best_model_path, args.device) # test model test_model(model, args, test_loader, pin_memory) args.logger.info( "Finished optimization, total time:{:.2f} s, best model:{}".format( total_time, best_model_path))
def train(inputs, args): # Model Setting global result path = args.path ct.mkdirs(path) if args.loss == "mse": lossfunc = func.mse_loss elif args.loss == "huber": lossfunc = func.smooth_l1_loss # Dataset Definition train_loader = DataLoader(TrafficDataset(inputs, "train"), batch_size=args.batch_size, shuffle=True, pin_memory=pin_memory, num_workers=n_work) val_loader = DataLoader(TrafficDataset(inputs, "val"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) vars(args)["sub_adj"] = vars(args)["adj"] test_loader = DataLoader(TrafficDataset(inputs, "test"), batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=n_work) args.logger.info("[*] Dataset load!") # Model Definition if args.init == True: gnn_model, _ = load_best_model(args) else: gnn_model = Integrated_Model(args).to(args.device) model = gnn_model # Model Optimizer optimizer = optim.AdamW(model.parameters(), lr=args.lr) args.logger.info("[*] Training start") global_train_steps = len(train_loader) // args.batch_size + 1 iters = len(train_loader) lowest_validation_loss = 1e7 counter = 0 patience = 5 model.train() use_time = [] for epoch in range(args.epoch): training_loss = 0.0 start_time = datetime.now() # Train Model cn = 0 for batch_idx, data in enumerate(train_loader): if epoch == 0 and batch_idx == 0: args.logger.info("node number {}".format(data.x.shape)) data = data.to(device, non_blocking=pin_memory) pred, high_flow, eigen_flow = model(data, args.sub_adj) # loss = lossfunc(data.y, pred, reduction="mean") mine_net_high = compute_mutual_information(data.x, high_flow.detach(), args.device) mine_net_eigen = compute_mutual_information( data.x, eigen_flow.detach(), args.device) loss = lossfunc(data.y, pred, reduction="mean") \ + compute_mi(mine_net_high, data.x, high_flow) \ - compute_mi(mine_net_eigen, data.x, eigen_flow) optimizer.zero_grad() training_loss += float(loss) loss.backward() optimizer.step() cn += 1 if epoch == 0: total_time = (datetime.now() - start_time).total_seconds() else: total_time += (datetime.now() - start_time).total_seconds() use_time.append((datetime.now() - start_time).total_seconds()) training_loss = training_loss / cn # Validate Model validation_loss = 0.0 cn = 0 with torch.no_grad(): for batch_idx, data in enumerate(val_loader): data = data.to(device, non_blocking=pin_memory) pred, _, _ = model(data, args.sub_adj) loss = masked_mae_np(data.y.cpu().data.numpy(), pred.cpu().data.numpy(), 0) validation_loss += float(loss) cn += 1 validation_loss = float(validation_loss / cn) args.logger.info( f"epoch:{epoch}, training loss:{training_loss:.4f} validation loss:{validation_loss:.4f}" ) # Early Stop if validation_loss <= lowest_validation_loss: counter = 0 lowest_validation_loss = round(validation_loss, 4) torch.save({'model_state_dict': gnn_model.state_dict()}, osp.join(path, str(round(validation_loss, 4)) + ".pkl")) else: counter += 1 if counter > patience: break best_model_path = osp.join(path, str(lowest_validation_loss) + ".pkl") best_model = Integrated_Model(args) best_model.load_state_dict( torch.load(best_model_path, args.device)["model_state_dict"]) best_model = best_model.to(args.device) # Test Model test_model(best_model, args, test_loader, pin_memory) result[args.year] = { "total_time": total_time, "average_time": sum(use_time) / len(use_time), "epoch_num": epoch + 1 } args.logger.info( "Finished optimization, total time:{:.2f} s, best model:{}".format( total_time, best_model_path))