コード例 #1
0
ファイル: main.py プロジェクト: ramonemiliani93/uncertainty
def run(model,
        train_loader,
        val_loader,
        optimizer,
        epochs,
        log_interval,
        log_dir,
        val=False,
        log=True):
    writer = create_summary_writer(log_dir)
    device = 'cpu'

    if torch.cuda.is_available():
        device = 'cuda'
    loss_metric = Average()
    trainer = create_train_engine(model, optimizer, device=device)
    evaluator = create_supervised_evaluator(model,
                                            metrics={'loss': loss_metric},
                                            device=device)

    @trainer.on(Events.ITERATION_COMPLETED(every=log_interval))
    def log_training_loss(engine):
        # print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
        #      "".format(engine.state.epoch, engine.state.iteration, len(train_loader), engine.state.output))
        writer.add_scalar("training/loss", engine.state.output,
                          engine.state.iteration)

    if log:

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_training_results(engine):
            evaluator.run(train_loader)
            metrics = evaluator.state.metrics
            avg_mse = metrics['loss']
            print("Training Results - Epoch: {}   Avg loss: {:.2f}".format(
                engine.state.epoch, avg_mse))
            writer.add_scalar("training/avg_loss", avg_mse, engine.state.epoch)

    if val:

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            evaluator.run(val_loader)
            metrics = evaluator.state.metrics
            avg_mse = metrics['loss']
            # print("Validation Results - Epoch: {}  Avg loss: {:.2f}"
            #      .format(engine.state.epoch, avg_mse))
            writer.add_scalar("validation/avg_loss", avg_mse,
                              engine.state.epoch)

    # kick everything off
    trainer.run(train_loader, max_epochs=epochs)

    writer.close()
コード例 #2
0
def stochastic_gradient_descent(X, Y, model, learning_rate=0.01,
                                mini_batch_fraction=0.01, epoch=10000, tol=1.e-6):
    """
    利用随机梯度下降法训练模型。

    参数
    ----
    X : np.array, 自变量数据

    Y : np.array, 因变量数据

    model : dict, 里面包含模型的参数,损失函数,自变量,应变量
    """
    # 确定最优化算法
    method = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    optimizer = method.minimize(model["loss_function"])
    # 增加日志
    tf.summary.scalar("loss_function", model["loss_function"])
    tf.summary.histogram("params", model["model_params"])
    tf.summary.scalar("first_param", tf.reduce_mean(model["model_params"][0]))
    tf.summary.scalar("last_param", tf.reduce_mean(model["model_params"][-1]))
    summary = tf.summary.merge_all()
    # 在程序运行结束之后,运行如下命令,查看日志
    # tensorboard --logdir logs/
    # Windows下的存储路径与Linux并不相同
    if os.name == "nt":
        summary_writer = create_summary_writer("logs\\stochastic_gradient_descent")
    else:
        summary_writer = create_summary_writer("logs/stochastic_gradient_descent")
    # tensorflow开始运行
    sess = tf.Session()
    # 产生初始参数
    init = tf.global_variables_initializer()
    # 用之前产生的初始参数初始化模型
    sess.run(init)
    # 迭代梯度下降法
    step = 0
    batch_size = int(X.shape[0] * mini_batch_fraction)
    batch_num = int(math.ceil(1 / mini_batch_fraction))
    prev_loss = np.inf
    diff = np.inf
    # 当损失函数的变动小于阈值或达到最大训练轮次,则停止迭代
    while (step < epoch) & (diff > tol):
        for i in range(batch_num):
            # 选取小批次训练数据
            batch_x = X[i * batch_size: (i + 1) * batch_size]
            batch_y = Y[i * batch_size: (i + 1) * batch_size]
            # 迭代模型参数
            sess.run([optimizer],
                     feed_dict={model["independent_variable"]: batch_x,
                                model["dependent_variable"]: batch_y})
            # 计算损失函数并写入日志
            summary_str, loss = sess.run(
                [summary, model["loss_function"]],
                feed_dict={model["independent_variable"]: X,
                           model["dependent_variable"]: Y})
            # 将运行细节写入目录
            summary_writer.add_summary(summary_str, step * batch_num + i)
            # 计算损失函数的变动
            diff = abs(prev_loss - loss)
            prev_loss = loss
            if diff <= tol:
                break
        step += 1
    summary_writer.close()
    # 在Windows下运行此脚本需确保Windows下的命令提示符(cmd)能显示中文
    # 输出最终结果
    print("模型参数:\n%s" % sess.run(model["model_params"]))
    print("训练轮次:%s" % step)
    print("损失函数值:%s" % loss)
コード例 #3
0
def train_loop(model,
               params,
               ds,
               min_y,
               base_data,
               model_id,
               device,
               batch_size,
               max_epochs=2):
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_train
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_val
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 100) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 5),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
コード例 #4
0
ファイル: train_ops.py プロジェクト: mguarin0/advrbtddos
def run_trainer(data_loader: dict,
                model: models,
                optimizer: optim,
                lr_scheduler: optim.lr_scheduler,
                criterion: nn,
                train_epochs: int,
                log_training_progress_every: int,
                log_val_progress_every: int,
                checkpoint_every: int,
                tb_summaries_dir: str,
                chkpt_dir: str,
                resume_from: str,
                to_device: object,
                to_cpu: object,
                attackers: object = None,
                train_adv_periodic_ops: int = None,
                *args,
                **kwargs):
    def mk_lr_step(loss):
        lr_scheduler.step(loss)

    def train_step(engine, batch):
        model.train()
        optimizer.zero_grad()
        x, y = map(lambda _: to_device(_), batch)
        if (train_adv_periodic_ops is not None) and (
                engine.state.iteration % train_adv_periodic_ops == 0):
            random_attacker = random.choice(list(attackers))
            x = attackers[random_attacker].perturb(x, y)
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        return loss.item()

    def eval_step(engine, batch):
        model.eval()
        with torch.no_grad():
            x, y = map(lambda _: to_device(_), batch)
            if random.choice(range(2)) % 2 == 0:
                random_attacker = random.choice(list(attackers))
                x = attackers[random_attacker].perturb(x, y)
            y_pred = model(x)
            return y_pred, y

    def chkpt_score_func(engine):
        val_eval.run(data_loader['val'])
        y_pred, y = val_eval.state.output
        loss = criterion(y_pred, y)
        return np.mean(to_cpu(loss, convert_to_np=True))

    # set up ignite engines
    trainer = Engine(train_step)
    train_eval = Engine(eval_step)
    val_eval = Engine(eval_step)

    @trainer.on(Events.ITERATION_COMPLETED(every=log_training_progress_every))
    def log_training_results(engine):
        step = True
        run_type = 'train'
        train_eval.run(data_loader['train'])
        y_pred, y = train_eval.state.output
        loss = criterion(y_pred, y)
        log_results(to_cpu(y_pred, convert_to_np=True),
                    to_cpu(y, convert_to_np=True),
                    to_cpu(loss, convert_to_np=True), run_type, step,
                    engine.state.iteration, total_train_steps, writer)

    @trainer.on(Events.ITERATION_COMPLETED(every=log_val_progress_every))
    def log_val_results(engine):
        step = True
        run_type = 'val'
        val_eval.run(data_loader['val'])
        y_pred, y = val_eval.state.output
        loss = criterion(y_pred, y)
        mk_lr_step(loss)
        log_results(to_cpu(y_pred, convert_to_np=True),
                    to_cpu(y, convert_to_np=True),
                    to_cpu(loss, convert_to_np=True), run_type, step,
                    engine.state.iteration, total_train_steps, writer)

    # set up vars
    total_train_steps = len(data_loader['train']) * train_epochs

    # reporter to identify memory usage
    # bottlenecks throughout network
    reporter = MemReporter()
    print_model(model, reporter)

    # set up tensorboard summary writer
    writer = create_summary_writer(model, data_loader['train'],
                                   tb_summaries_dir)
    # move model to device
    model = to_device(model)

    # set up progress bar
    RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')
    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(trainer, ['loss'])

    # set up checkpoint
    objects_to_checkpoint = {
        'trainer': trainer,
        'model': model,
        'optimizer': optimizer,
        'lr_scheduler': lr_scheduler
    }
    training_checkpoint = Checkpoint(to_save=objects_to_checkpoint,
                                     save_handler=DiskSaver(
                                         chkpt_dir, require_empty=False),
                                     n_saved=3,
                                     filename_prefix='best',
                                     score_function=chkpt_score_func,
                                     score_name='val_loss')

    # register events
    trainer.add_event_handler(
        Events.ITERATION_COMPLETED(every=checkpoint_every),
        training_checkpoint)

    # if resuming
    if resume_from and os.path.exists(resume_from):
        print(f'resume model from: {resume_from}')
        checkpoint = torch.load(resume_from)
        Checkpoint.load_objects(to_load=objects_to_checkpoint,
                                checkpoint=checkpoint)

    # fire training engine
    trainer.run(data_loader['train'], max_epochs=train_epochs)
コード例 #5
0
def adv_train_loop(model,
                   params,
                   ds,
                   min_y,
                   base_data,
                   model_id,
                   attack_type,
                   device,
                   batch_size,
                   max_epochs=5):
    print('training adversarial:', attack_type)
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    original_model = copy.deepcopy(
        model)  # used to generate adv images for the trained model
    original_model.eval()
    model = copy.deepcopy(
        model)  # making a copy so that original model is not changed
    model = model.to(device)
    model_id = f'{model_id}_{attack_type}'

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        classifier = PyTorchClassifier(
            model=original_model,
            clip_values=(0, 1),
            loss=nn.CrossEntropyLoss(),
            optimizer=optimizer,
            input_shape=(3, 64, 64),
            nb_classes=200,
        )

        attack = None

        #         if attack_type == "fgsm":
        #             attack = FastGradientMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "bim":
        #             attack = BasicIterativeMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "carlini":
        #             attack = CarliniLInfMethod(classifier=classifier)
        #         elif attack_type == "deepfool":
        #             attack = DeepFool(classifier=classifier)
        if attack_type == "fgsm":
            attack = GradientSignAttack(model, loss_fn=loss, eps=0.2)
        elif attack_type == "ffa":
            attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3)
        elif attack_type == "carlini":
            attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000)
        elif attack_type == "lbfgs":
            attack = DeepFool(classifier=classifier)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with ctx_noparamgrad_and_eval(model):
                x_adv = attack.perturb(x, y)
            optimizer.zero_grad()
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
コード例 #6
0
def prune_train_loop(model,
                     params,
                     ds,
                     min_y,
                     base_data,
                     model_id,
                     prune_type,
                     device,
                     batch_size,
                     max_epochs=5):
    assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = 0.3 if prune_type == 'global_unstructured' else 0.1
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    model_id = f'{model_id}_{prune_type}_pruning'

    conv_layers = [model.conv1]
    for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
        for bottleneck in sequential:
            conv_layers.extend(
                [bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])

    def prune_model(model):
        remove_amount = total_prune_amount / (max_epochs * 10)
        print(f'pruned model by {remove_amount}')
        if prune_type == 'global_unstructured':
            parameters_to_prune = [(layer, 'weight') for layer in conv_layers]
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=remove_amount,
            )
        else:
            for layer in conv_layers:
                prune.ln_structured(layer,
                                    name='weight',
                                    amount=remove_amount,
                                    n=1,
                                    dim=0)

    prune_model(model)

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            optimizer.zero_grad()
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask  # make sure pruned weights stay 0
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

            prune_model(model)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
コード例 #7
0
def gradient_descent(X,
                     Y,
                     model,
                     learning_rate=0.01,
                     max_iter=10000,
                     tol=1.e-6):
    """
    利用梯度下降法训练模型。

    参数
    ----
    X : np.array, 自变量数据

    Y : np.array, 因变量数据

    model : dict, 里面包含模型的参数,损失函数,自变量,应变量。
    """
    # 确定最优化算法
    method = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    optimizer = method.minimize(model["loss_function"])
    # 增加日志
    tf.summary.scalar("loss_function", model["loss_function"])
    tf.summary.histogram("params", model["model_params"])
    tf.summary.scalar("first_param", tf.reduce_mean(model["model_params"][0]))
    tf.summary.scalar("last_param", tf.reduce_mean(model["model_params"][-1]))
    summary = tf.summary.merge_all()
    # 在程序运行结束之后,运行如下命令,查看日志
    # tensorboard --logdir logs/
    # Windows下的存储路径与Linux并不相同
    if os.name == "nt":
        summary_writer = create_summary_writer("logs\\gradient_descent")
    else:
        summary_writer = create_summary_writer("logs/gradient_descent")
    # tensorflow开始运行
    sess = tf.Session()
    # 产生初始参数
    init = tf.global_variables_initializer()
    # 用之前产生的初始参数初始化模型
    sess.run(init)
    # 迭代梯度下降法
    step = 0
    prev_loss = np.inf
    diff = np.inf
    # 当损失函数的变动小于阈值或达到最大循环次数,则停止迭代
    while (step < max_iter) & (diff > tol):
        _, summary_str, loss = sess.run(
            [optimizer, summary, model["loss_function"]],
            feed_dict={
                model["independent_variable"]: X,
                model["dependent_variable"]: Y
            })
        # 将运行细节写入目录
        summary_writer.add_summary(summary_str, step)
        # 计算损失函数的变动
        diff = abs(prev_loss - loss)
        prev_loss = loss
        step += 1
    summary_writer.close()
    # 在Windows下运行此脚本需确保Windows下的命令提示符(cmd)能显示中文
    # 输出最终结果
    print("模型参数:\n%s" % sess.run(model["model_params"]))
    print("迭代次数:%s" % step)
    print("损失函数值:%s" % loss)