def train(model): adam = Adam(learning_rate=0.001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) out = model(img) acc = paddle.metric.accuracy(out, label, k=1) loss = nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 50 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) break
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_steps = int(graph.num_nodes / args.batch_size) * args.epoch scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) paddle.save(model.state_dict(), "model.pdparams")
def main(args): dataset = load(args.dataset, args.feature_pre_normalize) graph = dataset.graph train_index = dataset.train_index train_label = dataset.train_label val_index = dataset.val_index val_label = dataset.val_label test_index = dataset.test_index test_label = dataset.test_label criterion = paddle.nn.loss.CrossEntropyLoss() dur = [] best_test = [] for run in range(args.runs): cal_val_acc = [] cal_test_acc = [] cal_val_loss = [] cal_test_loss = [] gnn_model = GCN(input_size=graph.node_feat["words"].shape[1], num_class=dataset.num_classes, num_layers=1, dropout=0.5, hidden_size=16) optim = Adam( learning_rate=0.01, parameters=gnn_model.parameters(), weight_decay=0.0005) for epoch in tqdm.tqdm(range(200)): if epoch >= 3: start = time.time() train_loss, train_acc = train(train_index, train_label, gnn_model, graph, criterion, optim) if epoch >= 3: end = time.time() dur.append(end - start) val_loss, val_acc = eval(val_index, val_label, gnn_model, graph, criterion) cal_val_acc.append(val_acc.numpy()) cal_val_loss.append(val_loss.numpy()) test_loss, test_acc = eval(test_index, test_label, gnn_model, graph, criterion) cal_test_acc.append(test_acc.numpy()) cal_test_loss.append(test_loss.numpy()) log.info("Runs %s: Model: GCN Best Test Accuracy: %f" % (run, cal_test_acc[np.argmin(cal_val_loss)])) best_test.append(cal_test_acc[np.argmin(cal_val_loss)]) log.info("Average Speed %s sec/ epoch" % (np.mean(dur))) log.info("Dataset: %s Best Test Accuracy: %f ( stddev: %f )" % (args.dataset, np.mean(best_test), np.std(best_test)))
def main(args): if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel( graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) optim = Adam( learning_rate=args.learning_rate, parameters=model.parameters(), weight_decay=args.weight_decay) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader( train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss)
def main(args, config): dataset = load(args.dataset, args.feature_pre_normalize) graph = dataset.graph train_index = dataset.train_index train_label = dataset.train_label val_index = dataset.val_index val_label = dataset.val_label test_index = dataset.test_index test_label = dataset.test_label GraphModel = getattr(model, config.model_name) criterion = paddle.nn.loss.CrossEntropyLoss() dur = [] best_test = [] for run in range(args.runs): cal_val_acc = [] cal_test_acc = [] cal_val_loss = [] cal_test_loss = [] gnn_model = GraphModel(input_size=graph.node_feat["words"].shape[1], num_class=dataset.num_classes, **config) optim = Adam(learning_rate=config.learning_rate, parameters=gnn_model.parameters(), weight_decay=config.weight_decay) for epoch in tqdm.tqdm(range(args.epoch)): train_loss, train_acc = train(train_index, train_label, gnn_model, graph, criterion, optim) val_loss, val_acc = eval(val_index, val_label, gnn_model, graph, criterion) cal_val_acc.append(val_acc.numpy()) cal_val_loss.append(val_loss.numpy()) test_loss, test_acc = eval(test_index, test_label, gnn_model, graph, criterion) cal_test_acc.append(test_acc.numpy()) cal_test_loss.append(test_loss.numpy()) log.info( "Runs %s: Model: %s Best Test Accuracy: %f" % (run, config.model_name, cal_test_acc[np.argmin(cal_val_loss)])) best_test.append(cal_test_acc[np.argmin(cal_val_loss)]) log.info("Dataset: %s Best Test Accuracy: %f ( stddev: %f )" % (args.dataset, np.mean(best_test), np.std(best_test)))
def setup_model(self): config = self.config model = SpeakerEncoder( config.data.n_mel, config.model.num_layers, config.model.hidden_size, config.model.embedding_size) optimizer = Adam( config.training.learning_rate_init, parameters=model.parameters(), grad_clip=ClipGradByGlobalNorm(3)) self.model = DataParallel(model) if self.parallel else model self.model_core = model self.optimizer = optimizer
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() dataset = load(args.dataset) graph = dataset.graph model = Model(graph.num_nodes, args.embed_size, dataset.num_groups) model = paddle.DataParallel(model) batch_size = len(dataset.train_index) train_steps = int(len(dataset.train_index) / batch_size) * args.epoch scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.multiclass_learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) if args.load_from_static: model.set_state_dict(load_from_files("./model")) else: model.set_state_dict(paddle.load("model.pdparams")) train_data_loader = node_classify_generator(graph, dataset.train_index, batch_size=batch_size, epoch=1) test_data_loader = node_classify_generator(graph, dataset.test_index, batch_size=batch_size, epoch=1) best_test_macro_f1 = -1 for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, train_data_loader(), optim) test_loss, test_macro_f1, test_micro_f1 = test(model, test_data_loader()) best_test_macro_f1 = max(best_test_macro_f1, test_macro_f1) log.info("Best test macro f1 is %s." % best_test_macro_f1)
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() if args.edge_file: graph = load_from_file(args.edge_file) else: graph = load(args.dataset) edges = np.load("./edges.npy") edges = np.concatenate([edges, edges[:, [1, 0]]]) graph = pgl.Graph(edges) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_ds = ShardedDataset(graph.nodes, repeat=args.epoch) train_steps = int(len(train_ds) // args.batch_size) log.info("train_steps: %s" % train_steps) scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) train_loss = train(model, data_loader, optim) paddle.save(model.state_dict(), "model.pdparams")
def main(config, ip_list_file): ds = TrainPairDataset(config, ip_list_file) loader = Dataloader( ds, batch_size=config.batch_pair_size, num_workers=config.num_workers, stream_shuffle_size=config.pair_stream_shuffle_size, collate_fn=CollateFn()) model = SkipGramModel(config) if config.warm_start_from: log.info("warm start from %s" % config.warm_start_from) model.set_state_dict(paddle.load(config.warm_start_from)) optim = Adam( learning_rate=config.lr, parameters=model.parameters(), lazy_mode=config.lazy_mode) log.info("starting training...") train(config, model, loader, optim)
#define loss bce_loss = F.binary_cross_entropy_with_logits loss_fn = MixUpLoss(bce_loss) warm_steps = c['warm_steps'] lrs = np.linspace(1e-10, c['start_lr'], warm_steps) # restore checkpoint if args.restore != -1: model = ModelClass(pretrained=False, num_classes=c['num_classes'], dropout=c['dropout']) model_dict, optim_dict = load_checkpoint(c['model_dir'], args.restore, prefix) model.load_dict(model_dict) optimizer = Adam(learning_rate=c['start_lr'], parameters=model.parameters()) optimizer.set_state_dict(optim_dict) start_epoch = args.restore else: model = ModelClass(pretrained=True, num_classes=c['num_classes'], dropout=c['dropout']) # use imagenet pretrained optimizer = Adam(learning_rate=c['start_lr'], parameters=model.parameters()) start_epoch = 0 #for name,p in list(model.named_parameters())[:-2]: # print(name,p.stop_gradient) # p.stop_gradient = True
def testSetNumpyBeforeTrain(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [0.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) # set lr to 0.0, not update parameter new_lr = 0.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) scheduler = paddle.optimizer.PiecewiseLR(boundaries=bd, values=lr_arr) adam = Adam(learning_rate=scheduler, beta1=0.8, beta2=0.6, parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None np_opti_dict = {} np_state_dict = {} for k, v in self.opti_dict.items(): if isinstance(v, core.VarBase): np_opti_dict[v.name] = v.numpy() else: np_opti_dict[k] = v for k, v in self.state_dict.items(): np_state_dict[k] = v.numpy() adam.set_state_dict(np_opti_dict) ptb_model.set_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) dy_loss.backward() scheduler.step() adam.minimize(dy_loss) ptb_model.clear_gradients() opti_dict = adam.state_dict() for k, v in opti_dict.items(): if k == "LR_Scheduler": self.assertTrue( np.array_equal(v['last_epoch'], self.base_opti[k]['last_epoch'] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) # check parameter state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
def testSetVariableBeforeTrain(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=0.0, beta1=0.8, beta2=0.6, parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None adam.set_state_dict(self.opti_dict) ptb_model.set_dict(self.state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() opti_dict = adam.state_dict() for k, v in opti_dict.items(): if k == "global_step": self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
def testSetNumpy(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) new_lr = 1.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) scheduler = paddle.optimizer.PiecewiseLR(boundaries=bd, values=lr_arr) adam = Adam(learning_rate=scheduler, parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() adam.minimize(dy_loss) scheduler.step() ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() # check optimizer opti_dict = adam.state_dict() np_opti_dict = {} # set to zero for k, v in opti_dict.items(): if isinstance(v, core.VarBase): np_t = v.numpy() np_opti_dict[v.name] = np_t var = v.value().get_tensor() var.set(np.zeros_like(np_t), place) self.assertTrue(np.sum(np.abs(v.numpy())) == 0) else: np_opti_dict[k] = v if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 adam.set_state_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): if isinstance(v, core.VarBase): self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name])) else: self.assertEqual(v, self.base_opti[k]) # check parameter state_dict = ptb_model.state_dict() np_state_dict = {} for k, v in state_dict.items(): np_t = v.numpy() np_state_dict[k] = np_t var = v.value().get_tensor() var.set(np.zeros_like(np_t), place) ptb_model.set_dict(np_state_dict) state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
self.accuracy.reset() correct = self.accuracy.compute(x, label) self.accuracy.update(correct) acc = self.accuracy.accumulate() return x, acc else: return x # 准备多卡环境 dist.init_parallel_env() epoch_num = 5 BATCH_SIZE = 64 mnist = MNIST() adam = Adam(learning_rate=0.001, parameters=mnist.parameters()) # 数据并行模块 mnist = paddle.DataParallel(mnist) # 通过调用paddle.io.DataLoader来构造reader,这里需要使用DistributedBatchSampler为多张卡拆分数据 train_sampler = paddle.io.DistributedBatchSampler(MnistDataset(mode='train'), batch_size=BATCH_SIZE, drop_last=True) train_reader = paddle.io.DataLoader(MnistDataset(mode='train'), batch_sampler=train_sampler) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): img = data[0] label = data[1] label.stop_gradient = True
if __name__ == '__main__': in_features = 2 out_features = 1 N = 48 # Length of the time-series # Input feature is a sine and a cosine wave data_x = np.stack( [np.sin(np.linspace(0, 3 * np.pi, N)), np.cos(np.linspace(0, 3 * np.pi, N))], axis=1 ) data_x = np.expand_dims(data_x, axis=0).astype( np.float32) # Add batch dimension # Target output is a sine with double the frequency of the input signal data_y = np.sin(np.linspace(0, 6 * np.pi, N) ).reshape([1, N, 1]).astype(np.float32) data_x = paddle.to_tensor(data_x) data_y = paddle.to_tensor(data_y) print("data_y.shape: ", str(data_y.shape)) wiring = kncp.wirings.FullyConnected( 8, out_features) # 16 units, 8 motor neurons ltc_cell = LTCCell(wiring, in_features) dataloader = DataLoader(TensorDataset( [data_x, data_y]), batch_size=1, shuffle=True, num_workers=4) ltc_sequence = RNNSequence(ltc_cell) learn = SequenceLearner(ltc_sequence) opt = Adam(learning_rate=0.01, parameters=ltc_sequence.parameters()) loss = nn.MSELoss() learn.prepare(opt, loss) learn.fit(dataloader, epochs=400, verbose=1) results = learn.evaluate(dataloader)
def main(args): ds = GINDataset(args.data_path, args.dataset_name, self_loop=not args.train_eps, degree_as_nlabel=True) args.feat_size = ds.dim_nfeats train_ds, test_ds = fold10_split(ds, fold_idx=args.fold_idx, seed=args.seed) train_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=1, collate_fn=collate_fn) test_loader = Dataloader(test_ds, batch_size=args.batch_size, shuffle=False, num_workers=1, collate_fn=collate_fn) model = GINModel(args, ds.gclasses) epoch_step = len(train_loader) boundaries = [ i for i in range(50 * epoch_step, args.epochs * epoch_step, epoch_step * 50) ] values = [args.lr * 0.5**i for i in range(0, len(boundaries) + 1)] scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries, values=values, verbose=False) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) criterion = nn.loss.CrossEntropyLoss() global_step = 0 best_acc = 0.0 for epoch in range(1, args.epochs + 1): model.train() for idx, batch_data in enumerate(train_loader): graphs, labels = batch_data g = pgl.Graph.batch(graphs).tensor() labels = paddle.to_tensor(labels) pred = model(g) train_loss = criterion(pred, labels) train_loss.backward() train_acc = paddle.metric.accuracy(input=pred, label=labels, k=1) optim.step() optim.clear_grad() scheduler.step() global_step += 1 if global_step % 10 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f | acc %.4f" % (train_loss, train_acc) log.info(message) result = evaluate(model, test_loader, criterion) message = "eval: epoch %d | step %d | " % (epoch, global_step) for key, value in result.items(): message += " | %s %.6f" % (key, value) log.info(message) if best_acc < result['acc']: best_acc = result['acc'] log.info("best evaluating accuracy: %.6f" % best_acc)
def main(config): if dist.get_world_size() > 1: dist.init_parallel_env() if dist.get_rank() == 0: timestamp = datetime.now().strftime("%Hh%Mm%Ss") log_path = os.path.join(config.log_dir, "tensorboard_log_%s" % timestamp) writer = SummaryWriter(log_path) log.info("loading data") raw_dataset = GraphPropPredDataset(name=config.dataset_name) config.num_class = raw_dataset.num_tasks config.eval_metric = raw_dataset.eval_metric config.task_type = raw_dataset.task_type mol_dataset = MolDataset(config, raw_dataset, transform=make_multihop_edges) splitted_index = raw_dataset.get_idx_split() train_ds = Subset(mol_dataset, splitted_index['train'], mode='train') valid_ds = Subset(mol_dataset, splitted_index['valid'], mode="valid") test_ds = Subset(mol_dataset, splitted_index['test'], mode="test") log.info("Train Examples: %s" % len(train_ds)) log.info("Val Examples: %s" % len(valid_ds)) log.info("Test Examples: %s" % len(test_ds)) fn = CollateFn(config) train_loader = Dataloader(train_ds, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, collate_fn=fn) valid_loader = Dataloader(valid_ds, batch_size=config.batch_size, num_workers=config.num_workers, collate_fn=fn) test_loader = Dataloader(test_ds, batch_size=config.batch_size, num_workers=config.num_workers, collate_fn=fn) model = ClassifierNetwork(config.hidden_size, config.out_dim, config.num_layers, config.dropout_prob, config.virt_node, config.K, config.conv_type, config.appnp_hop, config.alpha) model = paddle.DataParallel(model) optim = Adam(learning_rate=config.lr, parameters=model.parameters()) criterion = nn.loss.BCEWithLogitsLoss() evaluator = Evaluator(config.dataset_name) best_valid = 0 global_step = 0 for epoch in range(1, config.epochs + 1): model.train() for idx, batch_data in enumerate(train_loader): g, mh_graphs, labels, unmask = batch_data g = g.tensor() multihop_graphs = [] for item in mh_graphs: multihop_graphs.append(item.tensor()) g.multi_hop_graphs = multihop_graphs labels = paddle.to_tensor(labels) unmask = paddle.to_tensor(unmask) pred = model(g) pred = paddle.masked_select(pred, unmask) labels = paddle.masked_select(labels, unmask) train_loss = criterion(pred, labels) train_loss.backward() optim.step() optim.clear_grad() if global_step % 80 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f" % (train_loss.numpy()) log.info(message) if dist.get_rank() == 0: writer.add_scalar("loss", train_loss.numpy(), global_step) global_step += 1 valid_result = evaluate(model, valid_loader, criterion, evaluator) message = "valid: epoch %d | step %d | " % (epoch, global_step) for key, value in valid_result.items(): message += " | %s %.6f" % (key, value) if dist.get_rank() == 0: writer.add_scalar("valid_%s" % key, value, global_step) log.info(message) test_result = evaluate(model, test_loader, criterion, evaluator) message = "test: epoch %d | step %d | " % (epoch, global_step) for key, value in test_result.items(): message += " | %s %.6f" % (key, value) if dist.get_rank() == 0: writer.add_scalar("test_%s" % key, value, global_step) log.info(message) if best_valid < valid_result[config.metrics]: best_valid = valid_result[config.metrics] best_valid_result = valid_result best_test_result = test_result message = "best result: epoch %d | " % (epoch) message += "valid %s: %.6f | " % (config.metrics, best_valid_result[config.metrics]) message += "test %s: %.6f | " % (config.metrics, best_test_result[config.metrics]) log.info(message) message = "final eval best result:%.6f" % best_valid_result[config.metrics] log.info(message) message = "final test best result:%.6f" % best_test_result[config.metrics] log.info(message)
def func_quant_aware_training(self): imperative_qat = self.imperative_qat seed = 1 np.random.seed(seed) paddle.static.default_main_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed lenet = ImperativeLenet() fixed_state = {} param_init_map = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal(loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( p_shape).astype('float32') fixed_state[name] = value param_init_map[param.name] = value lenet.set_dict(fixed_state) imperative_qat.quantize(lenet) adam = Adam(learning_rate=0.001, parameters=lenet.parameters()) dynamic_loss_rec = [] #for CI coverage conv_transpose = ModelForConv2dT() imperative_qat.quantize(conv_transpose) x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) conv_transpose(x_var) def train(model): adam = Adam(learning_rate=0.001, parameters=model.parameters()) epoch_num = 1 for epoch in range(epoch_num): model.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) out = model(img) acc = paddle.metric.accuracy(out, label, k=1) loss = nn.functional.loss.cross_entropy(out, label) avg_loss = paddle.mean(loss) avg_loss.backward() adam.minimize(avg_loss) model.clear_gradients() if batch_id % 50 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) break def test(model): model.eval() avg_acc = [[], []] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) out = model(img) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_acc[0].append(acc_top1.numpy()) avg_acc[1].append(acc_top5.numpy()) if batch_id % 100 == 0: _logger.info( "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=512, drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512) train(lenet) test(lenet)
def func_setUp(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) new_lr = 1.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, values=lr_arr) adam = Adam(learning_rate=scheduler, parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() adam.minimize(dy_loss) scheduler.step() ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() # check optimizer self.opti_dict = adam.state_dict() self.base_opti = {} for k, v in self.opti_dict.items(): if isinstance(v, (core.VarBase, core.eager.Tensor)): self.base_opti[v.name] = v.numpy() self.assertTrue(np.sum(np.abs(v.numpy())) != 0) else: self.base_opti[k] = v paddle.save(self.opti_dict, "./test_dy_v2.pdopt") self.state_dict = ptb_model.state_dict() self.model_base = {} for k, v in self.state_dict.items(): np_t = v.numpy() self.model_base[k] = np_t paddle.save(self.state_dict, "./test_dy_v2.pdparams")
def main(args): """ Model training for one epoch and return the average loss and model evaluating to monitor pcc. """ paddle.set_device('gpu:{}'.format(args.device) if args.use_cuda else 'cpu') logging.info('Load data ...') dataset = InMemoryDataset(npz_data_path=args.data_path) train_ds = Dataset(dataset[1]) test_ds = Dataset(dataset[0]) train_loader = train_ds.get_data_loader(batch_size=args.batch_size, collate_fn=collate_fn) test_loader = test_ds.get_data_loader(batch_size=args.batch_size, collate_fn=collate_fn) logging.info("Data loaded.") model = CDRModel(args) optim = Adam(learning_rate=args.lr, parameters=model.parameters()) criterion = nn.MSELoss() global_step = 0 best_pcc = 0.0 os.makedirs(args.output_path, exist_ok=True) best_model = os.path.join(args.output_path, 'best_model.pdparams') for epoch in range(1, args.epoch_num + 1): model.train() for idx, batch_data in enumerate(train_loader): graphs, mut, gexpr, met, label = batch_data g = pgl.Graph.batch(graphs).tensor() mut = paddle.to_tensor(mut) gexpr = paddle.to_tensor(gexpr) met = paddle.to_tensor(met) label = paddle.to_tensor(label) pred = model([g, mut, gexpr, met]) train_loss = paddle.pow(criterion(pred[:, 0], label)[0], 0.5) train_loss.backward() train_pcc = pearsonr(pred[:, 0].numpy(), label.numpy())[0] optim.step() optim.clear_grad() global_step += 1 if global_step % 500 == 0: message = "train: epoch %d | step %d | " % (epoch, global_step) message += "loss %.6f | pcc %.4f" % (train_loss, train_pcc) log.info(message) result = evaluate(model, test_loader, criterion) message = "eval: epoch %d | step %d " % (epoch, global_step) for key, value in result.items(): message += "| %s %.6f" % (key, value) log.info(message) if best_pcc < result['pcc']: best_pcc = result['pcc'] paddle.save(model.state_dict(), best_model) log.info("best evaluating accuracy: %.6f" % best_pcc)
freq_masking = RandomMasking( max_mask_count=config['max_freq_mask'], max_mask_width=config['max_freq_mask_width'], axis=-2) mel_augments = RandomApply([freq_masking, time_masking], p=0.25) transforms += [mel_augments] transforms = Compose(transforms) if args.restore != -1: logger.info(f'restoring from checkpoint {args.restore}') fn = os.path.join(config['model_dir'], f'{prefix}_checkpoint_epoch{args.restore}.tar') ckpt = paddle.load(fn) model.load_dict(ckpt['model']) optimizer = Adam(learning_rate=config['max_lr'], parameters=params) opti_state_dict = ckpt['opti'] try: optimizer.set_state_dict(opti_state_dict) except: logger.error('failed to load state dict for optimizers') try: loss_fn.load_dict(ckpt['loss']) except: logger.error('failed to load state dict for loss') start_epoch = args.restore + 1 else: start_epoch = 0 optimizer = Adam(learning_rate=config['max_lr'], parameters=params)