Beispiel #1
0
        def train(model):
            adam = Adam(learning_rate=0.001, parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array([x[1] for x in data
                                       ]).astype('int64').reshape(-1, 1)

                    img = paddle.to_tensor(x_data)
                    label = paddle.to_tensor(y_data)
                    out = model(img)
                    acc = paddle.metric.accuracy(out, label, k=1)
                    loss = nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 50 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))
                        break
Beispiel #2
0
def main(args):
    if not args.use_cuda:
        paddle.set_device("cpu")
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    graph = load(args.dataset)

    model = SkipGramModel(graph.num_nodes,
                          args.embed_size,
                          args.neg_num,
                          sparse=not args.use_cuda)
    model = paddle.DataParallel(model)

    train_steps = int(graph.num_nodes / args.batch_size) * args.epoch
    scheduler = paddle.optimizer.lr.PolynomialDecay(
        learning_rate=args.learning_rate,
        decay_steps=train_steps,
        end_lr=0.0001)

    optim = Adam(learning_rate=scheduler, parameters=model.parameters())

    train_ds = ShardedDataset(graph.nodes)
    collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size,
                               args.neg_num, args.neg_sample_type)
    data_loader = Dataloader(train_ds,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.sample_workers,
                             collate_fn=collate_fn)

    for epoch in tqdm.tqdm(range(args.epoch)):
        train_loss = train(model, data_loader, optim)
        log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss)
    paddle.save(model.state_dict(), "model.pdparams")
Beispiel #3
0
def main(args):
    dataset = load(args.dataset, args.feature_pre_normalize)

    graph = dataset.graph
    train_index = dataset.train_index
    train_label = dataset.train_label

    val_index = dataset.val_index
    val_label = dataset.val_label

    test_index = dataset.test_index
    test_label = dataset.test_label
    criterion = paddle.nn.loss.CrossEntropyLoss()

    dur = []

    best_test = []

    for run in range(args.runs):
        cal_val_acc = []
        cal_test_acc = []
        cal_val_loss = []
        cal_test_loss = []
        gnn_model = GCN(input_size=graph.node_feat["words"].shape[1],
                        num_class=dataset.num_classes,
                        num_layers=1,
                        dropout=0.5,
                        hidden_size=16)

        optim = Adam(
            learning_rate=0.01,
            parameters=gnn_model.parameters(),
            weight_decay=0.0005)

        for epoch in tqdm.tqdm(range(200)):
            if epoch >= 3:
                start = time.time()
            train_loss, train_acc = train(train_index, train_label, gnn_model,
                                          graph, criterion, optim)
            if epoch >= 3:
                end = time.time()
                dur.append(end - start)
            val_loss, val_acc = eval(val_index, val_label, gnn_model, graph,
                                     criterion)
            cal_val_acc.append(val_acc.numpy())
            cal_val_loss.append(val_loss.numpy())

            test_loss, test_acc = eval(test_index, test_label, gnn_model,
                                       graph, criterion)
            cal_test_acc.append(test_acc.numpy())
            cal_test_loss.append(test_loss.numpy())

        log.info("Runs %s: Model: GCN Best Test Accuracy: %f" %
                 (run, cal_test_acc[np.argmin(cal_val_loss)]))

        best_test.append(cal_test_acc[np.argmin(cal_val_loss)])

    log.info("Average Speed %s sec/ epoch" % (np.mean(dur)))
    log.info("Dataset: %s Best Test Accuracy: %f ( stddev: %f )" %
             (args.dataset, np.mean(best_test), np.std(best_test)))
Beispiel #4
0
def main(args):
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    graph = load(args.dataset)

    model = SkipGramModel(
        graph.num_nodes,
        args.embed_size,
        args.neg_num,
        sparse=not args.use_cuda)
    model = paddle.DataParallel(model)

    optim = Adam(
        learning_rate=args.learning_rate,
        parameters=model.parameters(),
        weight_decay=args.weight_decay)

    train_ds = ShardedDataset(graph.nodes)
    collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size,
                               args.neg_num, args.neg_sample_type)
    data_loader = Dataloader(
        train_ds,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.sample_workers,
        collate_fn=collate_fn)

    for epoch in tqdm.tqdm(range(args.epoch)):
        train_loss = train(model, data_loader, optim)
        log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss)
Beispiel #5
0
def main(args, config):
    dataset = load(args.dataset, args.feature_pre_normalize)

    graph = dataset.graph
    train_index = dataset.train_index
    train_label = dataset.train_label

    val_index = dataset.val_index
    val_label = dataset.val_label

    test_index = dataset.test_index
    test_label = dataset.test_label
    GraphModel = getattr(model, config.model_name)
    criterion = paddle.nn.loss.CrossEntropyLoss()

    dur = []

    best_test = []

    for run in range(args.runs):
        cal_val_acc = []
        cal_test_acc = []
        cal_val_loss = []
        cal_test_loss = []

        gnn_model = GraphModel(input_size=graph.node_feat["words"].shape[1],
                               num_class=dataset.num_classes,
                               **config)

        optim = Adam(learning_rate=config.learning_rate,
                     parameters=gnn_model.parameters(),
                     weight_decay=config.weight_decay)

        for epoch in tqdm.tqdm(range(args.epoch)):
            train_loss, train_acc = train(train_index, train_label, gnn_model,
                                          graph, criterion, optim)
            val_loss, val_acc = eval(val_index, val_label, gnn_model, graph,
                                     criterion)
            cal_val_acc.append(val_acc.numpy())
            cal_val_loss.append(val_loss.numpy())

            test_loss, test_acc = eval(test_index, test_label, gnn_model,
                                       graph, criterion)
            cal_test_acc.append(test_acc.numpy())
            cal_test_loss.append(test_loss.numpy())

        log.info(
            "Runs %s: Model: %s Best Test Accuracy: %f" %
            (run, config.model_name, cal_test_acc[np.argmin(cal_val_loss)]))

        best_test.append(cal_test_acc[np.argmin(cal_val_loss)])

    log.info("Dataset: %s Best Test Accuracy: %f ( stddev: %f )" %
             (args.dataset, np.mean(best_test), np.std(best_test)))
Beispiel #6
0
 def setup_model(self):
     config = self.config
     model = SpeakerEncoder(
         config.data.n_mel, 
         config.model.num_layers,
         config.model.hidden_size,
         config.model.embedding_size)
     optimizer = Adam(
         config.training.learning_rate_init, 
         parameters=model.parameters(),
         grad_clip=ClipGradByGlobalNorm(3))
     self.model = DataParallel(model) if self.parallel else model
     self.model_core = model
     self.optimizer = optimizer
Beispiel #7
0
def main(args):
    if not args.use_cuda:
        paddle.set_device("cpu")
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    dataset = load(args.dataset)
    graph = dataset.graph

    model = Model(graph.num_nodes, args.embed_size, dataset.num_groups)
    model = paddle.DataParallel(model)

    batch_size = len(dataset.train_index)

    train_steps = int(len(dataset.train_index) / batch_size) * args.epoch
    scheduler = paddle.optimizer.lr.PolynomialDecay(
        learning_rate=args.multiclass_learning_rate,
        decay_steps=train_steps,
        end_lr=0.0001)

    optim = Adam(learning_rate=scheduler, parameters=model.parameters())

    if args.load_from_static:
        model.set_state_dict(load_from_files("./model"))
    else:
        model.set_state_dict(paddle.load("model.pdparams"))

    train_data_loader = node_classify_generator(graph,
                                                dataset.train_index,
                                                batch_size=batch_size,
                                                epoch=1)
    test_data_loader = node_classify_generator(graph,
                                               dataset.test_index,
                                               batch_size=batch_size,
                                               epoch=1)

    best_test_macro_f1 = -1
    for epoch in tqdm.tqdm(range(args.epoch)):
        train_loss = train(model, train_data_loader(), optim)
        test_loss, test_macro_f1, test_micro_f1 = test(model,
                                                       test_data_loader())
        best_test_macro_f1 = max(best_test_macro_f1, test_macro_f1)
    log.info("Best test macro f1 is %s." % best_test_macro_f1)
Beispiel #8
0
def main(args):
    if not args.use_cuda:
        paddle.set_device("cpu")
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    if args.edge_file:
        graph = load_from_file(args.edge_file)
    else:
        graph = load(args.dataset)

    edges = np.load("./edges.npy")
    edges = np.concatenate([edges, edges[:, [1, 0]]])
    graph = pgl.Graph(edges)

    model = SkipGramModel(graph.num_nodes,
                          args.embed_size,
                          args.neg_num,
                          sparse=not args.use_cuda)
    model = paddle.DataParallel(model)

    train_ds = ShardedDataset(graph.nodes, repeat=args.epoch)

    train_steps = int(len(train_ds) // args.batch_size)
    log.info("train_steps: %s" % train_steps)
    scheduler = paddle.optimizer.lr.PolynomialDecay(
        learning_rate=args.learning_rate,
        decay_steps=train_steps,
        end_lr=0.0001)

    optim = Adam(learning_rate=scheduler, parameters=model.parameters())

    collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size,
                               args.neg_num, args.neg_sample_type)
    data_loader = Dataloader(train_ds,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.sample_workers,
                             collate_fn=collate_fn)

    train_loss = train(model, data_loader, optim)
    paddle.save(model.state_dict(), "model.pdparams")
Beispiel #9
0
def main(config, ip_list_file):
    ds = TrainPairDataset(config, ip_list_file)
    loader = Dataloader(
        ds,
        batch_size=config.batch_pair_size,
        num_workers=config.num_workers,
        stream_shuffle_size=config.pair_stream_shuffle_size,
        collate_fn=CollateFn())

    model = SkipGramModel(config)

    if config.warm_start_from:
        log.info("warm start from %s" % config.warm_start_from)
        model.set_state_dict(paddle.load(config.warm_start_from))

    optim = Adam(
        learning_rate=config.lr,
        parameters=model.parameters(),
        lazy_mode=config.lazy_mode)

    log.info("starting training...")
    train(config, model, loader, optim)
Beispiel #10
0
    #define loss
    bce_loss = F.binary_cross_entropy_with_logits
    loss_fn = MixUpLoss(bce_loss)

    warm_steps = c['warm_steps']
    lrs = np.linspace(1e-10, c['start_lr'], warm_steps)

    # restore checkpoint
    if args.restore != -1:
        model = ModelClass(pretrained=False,
                           num_classes=c['num_classes'],
                           dropout=c['dropout'])
        model_dict, optim_dict = load_checkpoint(c['model_dir'], args.restore,
                                                 prefix)
        model.load_dict(model_dict)
        optimizer = Adam(learning_rate=c['start_lr'],
                         parameters=model.parameters())
        optimizer.set_state_dict(optim_dict)
        start_epoch = args.restore

    else:
        model = ModelClass(pretrained=True,
                           num_classes=c['num_classes'],
                           dropout=c['dropout'])  # use imagenet pretrained
        optimizer = Adam(learning_rate=c['start_lr'],
                         parameters=model.parameters())
        start_epoch = 0

    #for name,p in list(model.named_parameters())[:-2]:
    # print(name,p.stop_gradient)
    # p.stop_gradient = True
Beispiel #11
0
    def testSetNumpyBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [0.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                # set lr to 0.0, not update parameter
                new_lr = 0.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            scheduler = paddle.optimizer.PiecewiseLR(boundaries=bd,
                                                     values=lr_arr)
            adam = Adam(learning_rate=scheduler,
                        beta1=0.8,
                        beta2=0.6,
                        parameters=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            np_opti_dict = {}
            np_state_dict = {}

            for k, v in self.opti_dict.items():
                if isinstance(v, core.VarBase):
                    np_opti_dict[v.name] = v.numpy()
                else:
                    np_opti_dict[k] = v

            for k, v in self.state_dict.items():
                np_state_dict[k] = v.numpy()

            adam.set_state_dict(np_opti_dict)
            ptb_model.set_dict(np_state_dict)
            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                scheduler.step()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "LR_Scheduler":
                    self.assertTrue(
                        np.array_equal(v['last_epoch'],
                                       self.base_opti[k]['last_epoch'] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            # check parameter

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Beispiel #12
0
    def testSetVariableBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=0.0,
                        beta1=0.8,
                        beta2=0.6,
                        parameters=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            adam.set_state_dict(self.opti_dict)
            ptb_model.set_dict(self.state_dict)

            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Beispiel #13
0
    def testSetNumpy(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            scheduler = paddle.optimizer.PiecewiseLR(boundaries=bd,
                                                     values=lr_arr)
            adam = Adam(learning_rate=scheduler,
                        parameters=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                scheduler.step()
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            opti_dict = adam.state_dict()
            np_opti_dict = {}
            # set to zero
            for k, v in opti_dict.items():
                if isinstance(v, core.VarBase):
                    np_t = v.numpy()
                    np_opti_dict[v.name] = np_t
                    var = v.value().get_tensor()
                    var.set(np.zeros_like(np_t), place)
                    self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
                else:
                    np_opti_dict[k] = v

            if isinstance(adam._learning_rate, LearningRateDecay):
                adam._learning_rate.step_num = 0

            adam.set_state_dict(np_opti_dict)

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if isinstance(v, core.VarBase):
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name]))
                else:
                    self.assertEqual(v, self.base_opti[k])

            # check parameter
            state_dict = ptb_model.state_dict()
            np_state_dict = {}
            for k, v in state_dict.items():
                np_t = v.numpy()
                np_state_dict[k] = np_t
                var = v.value().get_tensor()

                var.set(np.zeros_like(np_t), place)

            ptb_model.set_dict(np_state_dict)

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]

                self.assertTrue(np.array_equal(new_t, base_t))
Beispiel #14
0
            self.accuracy.reset()
            correct = self.accuracy.compute(x, label)
            self.accuracy.update(correct)
            acc = self.accuracy.accumulate()
            return x, acc
        else:
            return x


# 准备多卡环境
dist.init_parallel_env()

epoch_num = 5
BATCH_SIZE = 64
mnist = MNIST()
adam = Adam(learning_rate=0.001, parameters=mnist.parameters())
# 数据并行模块
mnist = paddle.DataParallel(mnist)

# 通过调用paddle.io.DataLoader来构造reader,这里需要使用DistributedBatchSampler为多张卡拆分数据
train_sampler = paddle.io.DistributedBatchSampler(MnistDataset(mode='train'),
                                                  batch_size=BATCH_SIZE,
                                                  drop_last=True)
train_reader = paddle.io.DataLoader(MnistDataset(mode='train'),
                                    batch_sampler=train_sampler)

for epoch in range(epoch_num):
    for batch_id, data in enumerate(train_reader()):
        img = data[0]
        label = data[1]
        label.stop_gradient = True
Beispiel #15
0
if __name__ == '__main__':
    in_features = 2
    out_features = 1
    N = 48  # Length of the time-series
    # Input feature is a sine and a cosine wave
    data_x = np.stack(
        [np.sin(np.linspace(0, 3 * np.pi, N)), np.cos(np.linspace(0, 3 * np.pi, N))], axis=1
    )
    data_x = np.expand_dims(data_x, axis=0).astype(
        np.float32)  # Add batch dimension
    # Target output is a sine with double the frequency of the input signal
    data_y = np.sin(np.linspace(0, 6 * np.pi, N)
                    ).reshape([1, N, 1]).astype(np.float32)
    data_x = paddle.to_tensor(data_x)
    data_y = paddle.to_tensor(data_y)
    print("data_y.shape: ", str(data_y.shape))

    wiring = kncp.wirings.FullyConnected(
        8, out_features)  # 16 units, 8 motor neurons
    ltc_cell = LTCCell(wiring, in_features)
    dataloader = DataLoader(TensorDataset(
        [data_x, data_y]), batch_size=1, shuffle=True, num_workers=4)

    ltc_sequence = RNNSequence(ltc_cell)
    learn = SequenceLearner(ltc_sequence)
    opt = Adam(learning_rate=0.01, parameters=ltc_sequence.parameters())
    loss = nn.MSELoss()
    learn.prepare(opt, loss)
    learn.fit(dataloader, epochs=400, verbose=1)
    results = learn.evaluate(dataloader)
Beispiel #16
0
def main(args):
    ds = GINDataset(args.data_path,
                    args.dataset_name,
                    self_loop=not args.train_eps,
                    degree_as_nlabel=True)
    args.feat_size = ds.dim_nfeats

    train_ds, test_ds = fold10_split(ds,
                                     fold_idx=args.fold_idx,
                                     seed=args.seed)

    train_loader = Dataloader(train_ds,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=1,
                              collate_fn=collate_fn)
    test_loader = Dataloader(test_ds,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=collate_fn)

    model = GINModel(args, ds.gclasses)

    epoch_step = len(train_loader)
    boundaries = [
        i for i in range(50 * epoch_step, args.epochs *
                         epoch_step, epoch_step * 50)
    ]
    values = [args.lr * 0.5**i for i in range(0, len(boundaries) + 1)]
    scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries,
                                                   values=values,
                                                   verbose=False)
    optim = Adam(learning_rate=scheduler, parameters=model.parameters())
    criterion = nn.loss.CrossEntropyLoss()

    global_step = 0
    best_acc = 0.0
    for epoch in range(1, args.epochs + 1):
        model.train()
        for idx, batch_data in enumerate(train_loader):
            graphs, labels = batch_data
            g = pgl.Graph.batch(graphs).tensor()
            labels = paddle.to_tensor(labels)

            pred = model(g)
            train_loss = criterion(pred, labels)
            train_loss.backward()
            train_acc = paddle.metric.accuracy(input=pred, label=labels, k=1)
            optim.step()
            optim.clear_grad()
            scheduler.step()

            global_step += 1
            if global_step % 10 == 0:
                message = "train: epoch %d | step %d | " % (epoch, global_step)
                message += "loss %.6f | acc %.4f" % (train_loss, train_acc)
                log.info(message)

        result = evaluate(model, test_loader, criterion)
        message = "eval: epoch %d | step %d | " % (epoch, global_step)
        for key, value in result.items():
            message += " | %s %.6f" % (key, value)
        log.info(message)

        if best_acc < result['acc']:
            best_acc = result['acc']

    log.info("best evaluating accuracy: %.6f" % best_acc)
Beispiel #17
0
def main(config):
    if dist.get_world_size() > 1:
        dist.init_parallel_env()

    if dist.get_rank() == 0:
        timestamp = datetime.now().strftime("%Hh%Mm%Ss")
        log_path = os.path.join(config.log_dir,
                                "tensorboard_log_%s" % timestamp)
        writer = SummaryWriter(log_path)

    log.info("loading data")
    raw_dataset = GraphPropPredDataset(name=config.dataset_name)
    config.num_class = raw_dataset.num_tasks
    config.eval_metric = raw_dataset.eval_metric
    config.task_type = raw_dataset.task_type

    mol_dataset = MolDataset(config,
                             raw_dataset,
                             transform=make_multihop_edges)
    splitted_index = raw_dataset.get_idx_split()
    train_ds = Subset(mol_dataset, splitted_index['train'], mode='train')
    valid_ds = Subset(mol_dataset, splitted_index['valid'], mode="valid")
    test_ds = Subset(mol_dataset, splitted_index['test'], mode="test")

    log.info("Train Examples: %s" % len(train_ds))
    log.info("Val Examples: %s" % len(valid_ds))
    log.info("Test Examples: %s" % len(test_ds))

    fn = CollateFn(config)

    train_loader = Dataloader(train_ds,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=config.num_workers,
                              collate_fn=fn)

    valid_loader = Dataloader(valid_ds,
                              batch_size=config.batch_size,
                              num_workers=config.num_workers,
                              collate_fn=fn)

    test_loader = Dataloader(test_ds,
                             batch_size=config.batch_size,
                             num_workers=config.num_workers,
                             collate_fn=fn)

    model = ClassifierNetwork(config.hidden_size, config.out_dim,
                              config.num_layers, config.dropout_prob,
                              config.virt_node, config.K, config.conv_type,
                              config.appnp_hop, config.alpha)
    model = paddle.DataParallel(model)

    optim = Adam(learning_rate=config.lr, parameters=model.parameters())
    criterion = nn.loss.BCEWithLogitsLoss()

    evaluator = Evaluator(config.dataset_name)

    best_valid = 0

    global_step = 0
    for epoch in range(1, config.epochs + 1):
        model.train()
        for idx, batch_data in enumerate(train_loader):
            g, mh_graphs, labels, unmask = batch_data
            g = g.tensor()
            multihop_graphs = []
            for item in mh_graphs:
                multihop_graphs.append(item.tensor())
            g.multi_hop_graphs = multihop_graphs
            labels = paddle.to_tensor(labels)
            unmask = paddle.to_tensor(unmask)

            pred = model(g)
            pred = paddle.masked_select(pred, unmask)
            labels = paddle.masked_select(labels, unmask)
            train_loss = criterion(pred, labels)
            train_loss.backward()
            optim.step()
            optim.clear_grad()

            if global_step % 80 == 0:
                message = "train: epoch %d | step %d | " % (epoch, global_step)
                message += "loss %.6f" % (train_loss.numpy())
                log.info(message)
                if dist.get_rank() == 0:
                    writer.add_scalar("loss", train_loss.numpy(), global_step)
            global_step += 1

        valid_result = evaluate(model, valid_loader, criterion, evaluator)
        message = "valid: epoch %d | step %d | " % (epoch, global_step)
        for key, value in valid_result.items():
            message += " | %s %.6f" % (key, value)
            if dist.get_rank() == 0:
                writer.add_scalar("valid_%s" % key, value, global_step)
        log.info(message)

        test_result = evaluate(model, test_loader, criterion, evaluator)
        message = "test: epoch %d | step %d | " % (epoch, global_step)
        for key, value in test_result.items():
            message += " | %s %.6f" % (key, value)
            if dist.get_rank() == 0:
                writer.add_scalar("test_%s" % key, value, global_step)
        log.info(message)

        if best_valid < valid_result[config.metrics]:
            best_valid = valid_result[config.metrics]
            best_valid_result = valid_result
            best_test_result = test_result

        message = "best result: epoch %d | " % (epoch)
        message += "valid %s: %.6f | " % (config.metrics,
                                          best_valid_result[config.metrics])
        message += "test %s: %.6f | " % (config.metrics,
                                         best_test_result[config.metrics])
        log.info(message)

    message = "final eval best result:%.6f" % best_valid_result[config.metrics]
    log.info(message)
    message = "final test best result:%.6f" % best_test_result[config.metrics]
    log.info(message)
Beispiel #18
0
    def func_quant_aware_training(self):
        imperative_qat = self.imperative_qat
        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed
        lenet = ImperativeLenet()
        fixed_state = {}
        param_init_map = {}
        for name, param in lenet.named_parameters():
            p_shape = param.numpy().shape
            p_value = param.numpy()
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
                value = np.random.normal(loc=0.0,
                                         scale=0.01,
                                         size=np.product(p_shape)).reshape(
                                             p_shape).astype('float32')
            fixed_state[name] = value
            param_init_map[param.name] = value
        lenet.set_dict(fixed_state)

        imperative_qat.quantize(lenet)
        adam = Adam(learning_rate=0.001, parameters=lenet.parameters())
        dynamic_loss_rec = []
        #for CI coverage
        conv_transpose = ModelForConv2dT()
        imperative_qat.quantize(conv_transpose)
        x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.)
        conv_transpose(x_var)

        def train(model):
            adam = Adam(learning_rate=0.001, parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array([x[1] for x in data
                                       ]).astype('int64').reshape(-1, 1)

                    img = paddle.to_tensor(x_data)
                    label = paddle.to_tensor(y_data)
                    out = model(img)
                    acc = paddle.metric.accuracy(out, label, k=1)
                    loss = nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 50 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy()))
                        break

        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = paddle.to_tensor(x_data)
                label = paddle.to_tensor(y_data)

                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))

        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=512,
                                    drop_last=True)
        test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512)
        train(lenet)
        test(lenet)
Beispiel #19
0
    def func_setUp(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd,
                                                           values=lr_arr)
            adam = Adam(learning_rate=scheduler,
                        parameters=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                scheduler.step()
                ptb_model.clear_gradients()

                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            self.opti_dict = adam.state_dict()
            self.base_opti = {}
            for k, v in self.opti_dict.items():
                if isinstance(v, (core.VarBase, core.eager.Tensor)):
                    self.base_opti[v.name] = v.numpy()
                    self.assertTrue(np.sum(np.abs(v.numpy())) != 0)
                else:
                    self.base_opti[k] = v

            paddle.save(self.opti_dict, "./test_dy_v2.pdopt")

            self.state_dict = ptb_model.state_dict()

            self.model_base = {}
            for k, v in self.state_dict.items():
                np_t = v.numpy()
                self.model_base[k] = np_t

            paddle.save(self.state_dict, "./test_dy_v2.pdparams")
Beispiel #20
0
def main(args):
    """
    Model training for one epoch and return the average loss and model evaluating to monitor pcc.
    """
    paddle.set_device('gpu:{}'.format(args.device) if args.use_cuda else 'cpu')

    logging.info('Load data ...')
    dataset = InMemoryDataset(npz_data_path=args.data_path)

    train_ds = Dataset(dataset[1])
    test_ds = Dataset(dataset[0])
    train_loader = train_ds.get_data_loader(batch_size=args.batch_size,
                                            collate_fn=collate_fn)
    test_loader = test_ds.get_data_loader(batch_size=args.batch_size,
                                          collate_fn=collate_fn)

    logging.info("Data loaded.")

    model = CDRModel(args)

    optim = Adam(learning_rate=args.lr, parameters=model.parameters())
    criterion = nn.MSELoss()

    global_step = 0
    best_pcc = 0.0
    os.makedirs(args.output_path, exist_ok=True)
    best_model = os.path.join(args.output_path, 'best_model.pdparams')

    for epoch in range(1, args.epoch_num + 1):
        model.train()
        for idx, batch_data in enumerate(train_loader):
            graphs, mut, gexpr, met, label = batch_data
            g = pgl.Graph.batch(graphs).tensor()
            mut = paddle.to_tensor(mut)
            gexpr = paddle.to_tensor(gexpr)
            met = paddle.to_tensor(met)
            label = paddle.to_tensor(label)

            pred = model([g, mut, gexpr, met])
            train_loss = paddle.pow(criterion(pred[:, 0], label)[0], 0.5)
            train_loss.backward()
            train_pcc = pearsonr(pred[:, 0].numpy(), label.numpy())[0]
            optim.step()
            optim.clear_grad()

            global_step += 1
            if global_step % 500 == 0:
                message = "train: epoch %d | step %d | " % (epoch, global_step)
                message += "loss %.6f | pcc %.4f" % (train_loss, train_pcc)
                log.info(message)

        result = evaluate(model, test_loader, criterion)
        message = "eval: epoch %d | step %d " % (epoch, global_step)
        for key, value in result.items():
            message += "| %s %.6f" % (key, value)
        log.info(message)

        if best_pcc < result['pcc']:
            best_pcc = result['pcc']
            paddle.save(model.state_dict(), best_model)

    log.info("best evaluating accuracy: %.6f" % best_pcc)
Beispiel #21
0
        freq_masking = RandomMasking(
            max_mask_count=config['max_freq_mask'],
            max_mask_width=config['max_freq_mask_width'],
            axis=-2)

        mel_augments = RandomApply([freq_masking, time_masking], p=0.25)
        transforms += [mel_augments]
    transforms = Compose(transforms)

    if args.restore != -1:
        logger.info(f'restoring from checkpoint {args.restore}')
        fn = os.path.join(config['model_dir'],
                          f'{prefix}_checkpoint_epoch{args.restore}.tar')
        ckpt = paddle.load(fn)
        model.load_dict(ckpt['model'])
        optimizer = Adam(learning_rate=config['max_lr'], parameters=params)
        opti_state_dict = ckpt['opti']
        try:
            optimizer.set_state_dict(opti_state_dict)
        except:
            logger.error('failed to load state dict for optimizers')
        try:
            loss_fn.load_dict(ckpt['loss'])
        except:
            logger.error('failed to load state dict for loss')

        start_epoch = args.restore + 1
    else:
        start_epoch = 0
        optimizer = Adam(learning_rate=config['max_lr'], parameters=params)