# 通过调用paddle.io.DataLoader来构造reader,这里需要使用DistributedBatchSampler为多张卡拆分数据 train_sampler = paddle.io.DistributedBatchSampler(MnistDataset(mode='train'), batch_size=BATCH_SIZE, drop_last=True) train_reader = paddle.io.DataLoader(MnistDataset(mode='train'), batch_sampler=train_sampler) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): img = data[0] label = data[1] label.stop_gradient = True # 网络正向执行 pred, acc = mnist(img, label) # 计算损失值 loss = paddle.nn.functional.cross_entropy(pred, label) avg_loss = paddle.mean(loss) avg_loss.backward() # 参数更新 adam.step() # 将本次计算的梯度值清零,以便进行下一次迭代和梯度更新 adam.clear_grad() # 输出对应epoch、batch_id下的损失值 if batch_id % 100 == 0 and batch_id is not 0: print("Epoch {} step {}, Loss = {:}, Accuracy = {:}".format( epoch, batch_id, avg_loss.numpy(), acc))
def main(args): """ Model training for one epoch and return the average loss and model evaluating to monitor pcc. """ paddle.set_device('gpu:{}'.format(args.device) if args.use_cuda else 'cpu') logging.info('Load data ...') dataset = InMemoryDataset(npz_data_path=args.data_path) train_ds = Dataset(dataset[1]) test_ds = Dataset(dataset[0]) train_loader = train_ds.get_data_loader(batch_size=args.batch_size, collate_fn=collate_fn) test_loader = test_ds.get_data_loader(batch_size=args.batch_size, collate_fn=collate_fn) logging.info("Data loaded.") model = CDRModel(args) optim = Adam(learning_rate=args.lr, parameters=model.parameters()) criterion = nn.MSELoss() global_step = 0 best_pcc = 0.0 os.makedirs(args.output_path, exist_ok=True) best_model = os.path.join(args.output_path, 'best_model.pdparams') for epoch in range(1, args.epoch_num + 1): model.train() for idx, batch_data in enumerate(train_loader): graphs, mut, gexpr, met, label = batch_data g = pgl.Graph.batch(graphs).tensor() mut = paddle.to_tensor(mut) gexpr = paddle.to_tensor(gexpr) met = paddle.to_tensor(met) label = paddle.to_tensor(label) pred = model([g, mut, gexpr, met]) train_loss = paddle.pow(criterion(pred[:, 0], label)[0], 0.5) train_loss.backward() train_pcc = pearsonr(pred[:, 0].numpy(), label.numpy())[0] optim.step() optim.clear_grad() global_step += 1 if global_step % 500 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f | pcc %.4f" % (train_loss, train_pcc) log.info(message) result = evaluate(model, test_loader, criterion) message = "eval: epoch %d | step %d " % (epoch, global_step) for key, value in result.items(): message += "| %s %.6f" % (key, value) log.info(message) if best_pcc < result['pcc']: best_pcc = result['pcc'] paddle.save(model.state_dict(), best_model) log.info("best evaluating accuracy: %.6f" % best_pcc)
def main(args): ds = GINDataset(args.data_path, args.dataset_name, self_loop=not args.train_eps, degree_as_nlabel=True) args.feat_size = ds.dim_nfeats train_ds, test_ds = fold10_split(ds, fold_idx=args.fold_idx, seed=args.seed) train_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=1, collate_fn=collate_fn) test_loader = Dataloader(test_ds, batch_size=args.batch_size, shuffle=False, num_workers=1, collate_fn=collate_fn) model = GINModel(args, ds.gclasses) epoch_step = len(train_loader) boundaries = [ i for i in range(50 * epoch_step, args.epochs * epoch_step, epoch_step * 50) ] values = [args.lr * 0.5**i for i in range(0, len(boundaries) + 1)] scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries, values=values, verbose=False) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) criterion = nn.loss.CrossEntropyLoss() global_step = 0 best_acc = 0.0 for epoch in range(1, args.epochs + 1): model.train() for idx, batch_data in enumerate(train_loader): graphs, labels = batch_data g = pgl.Graph.batch(graphs).tensor() labels = paddle.to_tensor(labels) pred = model(g) train_loss = criterion(pred, labels) train_loss.backward() train_acc = paddle.metric.accuracy(input=pred, label=labels, k=1) optim.step() optim.clear_grad() scheduler.step() global_step += 1 if global_step % 10 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f | acc %.4f" % (train_loss, train_acc) log.info(message) result = evaluate(model, test_loader, criterion) message = "eval: epoch %d | step %d | " % (epoch, global_step) for key, value in result.items(): message += " | %s %.6f" % (key, value) log.info(message) if best_acc < result['acc']: best_acc = result['acc'] log.info("best evaluating accuracy: %.6f" % best_acc)
def main(config): if dist.get_world_size() > 1: dist.init_parallel_env() if dist.get_rank() == 0: timestamp = datetime.now().strftime("%Hh%Mm%Ss") log_path = os.path.join(config.log_dir, "tensorboard_log_%s" % timestamp) writer = SummaryWriter(log_path) log.info("loading data") raw_dataset = GraphPropPredDataset(name=config.dataset_name) config.num_class = raw_dataset.num_tasks config.eval_metric = raw_dataset.eval_metric config.task_type = raw_dataset.task_type mol_dataset = MolDataset(config, raw_dataset, transform=make_multihop_edges) splitted_index = raw_dataset.get_idx_split() train_ds = Subset(mol_dataset, splitted_index['train'], mode='train') valid_ds = Subset(mol_dataset, splitted_index['valid'], mode="valid") test_ds = Subset(mol_dataset, splitted_index['test'], mode="test") log.info("Train Examples: %s" % len(train_ds)) log.info("Val Examples: %s" % len(valid_ds)) log.info("Test Examples: %s" % len(test_ds)) fn = CollateFn(config) train_loader = Dataloader(train_ds, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=fn) valid_loader = Dataloader(valid_ds, batch_size=config.batch_size, num_workers=config.num_workers, collate_fn=fn) test_loader = Dataloader(test_ds, batch_size=config.batch_size, num_workers=config.num_workers, collate_fn=fn) model = ClassifierNetwork(config.hidden_size, config.out_dim, config.num_layers, config.dropout_prob, config.virt_node, config.K, config.conv_type, config.appnp_hop, config.alpha) model = paddle.DataParallel(model) optim = Adam(learning_rate=config.lr, parameters=model.parameters()) criterion = nn.loss.BCEWithLogitsLoss() evaluator = Evaluator(config.dataset_name) best_valid = 0 global_step = 0 for epoch in range(1, config.epochs + 1): model.train() for idx, batch_data in enumerate(train_loader): g, mh_graphs, labels, unmask = batch_data g = g.tensor() multihop_graphs = [] for item in mh_graphs: multihop_graphs.append(item.tensor()) g.multi_hop_graphs = multihop_graphs labels = paddle.to_tensor(labels) unmask = paddle.to_tensor(unmask) pred = model(g) pred = paddle.masked_select(pred, unmask) labels = paddle.masked_select(labels, unmask) train_loss = criterion(pred, labels) train_loss.backward() optim.step() optim.clear_grad() if global_step % 80 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f" % (train_loss.numpy()) log.info(message) if dist.get_rank() == 0: writer.add_scalar("loss", train_loss.numpy(), global_step) global_step += 1 valid_result = evaluate(model, valid_loader, criterion, evaluator) message = "valid: epoch %d | step %d | " % (epoch, global_step) for key, value in valid_result.items(): message += " | %s %.6f" % (key, value) if dist.get_rank() == 0: writer.add_scalar("valid_%s" % key, value, global_step) log.info(message) test_result = evaluate(model, test_loader, criterion, evaluator) message = "test: epoch %d | step %d | " % (epoch, global_step) for key, value in test_result.items(): message += " | %s %.6f" % (key, value) if dist.get_rank() == 0: writer.add_scalar("test_%s" % key, value, global_step) log.info(message) if best_valid < valid_result[config.metrics]: best_valid = valid_result[config.metrics] best_valid_result = valid_result best_test_result = test_result message = "best result: epoch %d | " % (epoch) message += "valid %s: %.6f | " % (config.metrics, best_valid_result[config.metrics]) message += "test %s: %.6f | " % (config.metrics, best_test_result[config.metrics]) log.info(message) message = "final eval best result:%.6f" % best_valid_result[config.metrics] log.info(message) message = "final test best result:%.6f" % best_test_result[config.metrics] log.info(message)