Esempio n. 1
0
class Trainer(object):
    def __init__(self, model=None, criterion=None, optim=None):
        self.model = model
        self.criterion = criterion
        self.optim = optim
        self.engine = Engine()

        # These parameters should be defined in subclass.
        self.meters = []
        self.batch_size = NotImplemented
        self.batch_workers = NotImplemented

    def set_up(self):
        raise NotImplemented

    def _print_information(self, prefix):
        raise NotImplemented

    def get_loss_and_output(self, sample):
        raise NotImplemented

    def reset_meters(self):
        for meter in self.meters:
            meter.reset()

    def get_iterator(self, is_train):
        raise NotImplemented

    @staticmethod
    def on_sample(state):
        # state['sample'].append(state['train'])
        pass

    def on_forward(self, state):
        raise NotImplemented

    def on_start_epoch(self, state):
        # self.reset_meters()
        pass
        # state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(self, state):
        raise NotImplemented

    def on_update(self, state):
        raise NotImplemented

    def run(self, epochs):
        self.engine.hooks['on_sample'] = self.on_sample
        self.engine.hooks['on_forward'] = self.on_forward
        self.engine.hooks['on_start_epoch'] = self.on_start_epoch
        self.engine.hooks['on_end_epoch'] = self.on_end_epoch
        self.engine.train(self.get_loss_and_output,
                          self.get_iterator(True),
                          maxepoch=epochs,
                          optimizer=self.optim)
Esempio n. 2
0
    def __init__(self, model=None, criterion=None, optim=None):
        self.model = model
        self.criterion = criterion
        self.optim = optim
        self.engine = Engine()

        # These parameters should be defined in subclass.
        self.meters = []
        self.batch_size = NotImplemented
        self.batch_workers = NotImplemented
Esempio n. 3
0
def main():
    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)

    params = {
        'conv0.weight': conv_init(1, 50, 5),
        'conv0.bias': torch.zeros(50),
        'conv1.weight': conv_init(50, 50, 5),
        'conv1.bias': torch.zeros(50),
        'linear2.weight': linear_init(800, 512),
        'linear2.bias': torch.zeros(512),
        'linear3.weight': linear_init(512, 10),
        'linear3.bias': torch.zeros(10),
    }

    for k, v in params.items():
        params[k] = Variable(v, requires_grad=True)

    def h(sample):
        inputs = Variable(sample[0].float() / 255.0)
        targets = Variable(torch.LongTensor(sample[1]))
        o = f(params, inputs, sample[2])
        return F.cross_entropy(o, targets), o

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classerr.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start_epoch(state):
        classerr.reset()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):
        print classerr.value()

    optimizer = torch.optim.SGD(params.values(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=0.0005)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h, get_iterator(True), 10, optimizer)
    engine.test(h, get_iterator(False))
def main():
    params = {
        'conv0.weight': conv_init(1, 50, 5),
        'conv0.bias': torch.zeros(50),
        'conv1.weight': conv_init(50, 50, 5),
        'conv1.bias': torch.zeros(50),
        'linear2.weight': linear_init(800, 512),
        'linear2.bias': torch.zeros(512),
        'linear3.weight': linear_init(512, 10),
        'linear3.bias': torch.zeros(10),
    }
    params = {k: Variable(v, requires_grad=True) for k, v in params.items()}

    optimizer = torch.optim.SGD(params.values(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=0.0005)

    engine = Engine()

    mlog = MeterLogger(nclass=10, title="mnist_meterlogger")

    def h(sample):
        inputs = Variable(sample[0].float() / 255.0)
        targets = Variable(torch.LongTensor(sample[1]))
        o = f(params, inputs, sample[2])
        return F.cross_entropy(o, targets), o

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        loss = state['loss']
        output = state['output']
        target = state['sample'][1]
        # online ploter
        mlog.update_loss(loss, meter='loss')
        mlog.update_meter(output,
                          target,
                          meters={'accuracy', 'map', 'confusion'})

    def on_start_epoch(state):
        mlog.timer.reset()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):
        mlog.print_meter(mode="Train", iepoch=state['epoch'])
        mlog.reset_meter(mode="Train", iepoch=state['epoch'])

        # do validation at the end of each epoch
        engine.test(h, get_iterator(False))
        mlog.print_meter(mode="Test", iepoch=state['epoch'])
        mlog.reset_meter(mode="Test", iepoch=state['epoch'])

    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
Esempio n. 5
0
def main():
    params = {
        "conv0.weight": conv_init(1, 50, 5),
        "conv0.bias": torch.zeros(50),
        "conv1.weight": conv_init(50, 50, 5),
        "conv1.bias": torch.zeros(50),
        "linear2.weight": linear_init(800, 512),
        "linear2.bias": torch.zeros(512),
        "linear3.weight": linear_init(512, 10),
        "linear3.bias": torch.zeros(10),
    }
    params = {k: Variable(v, requires_grad=True) for k, v in params.items()}

    optimizer = torch.optim.SGD(params.values(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=0.0005)

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)

    def h(sample):
        inputs = Variable(sample[0].float() / 255.0)
        targets = Variable(torch.LongTensor(sample[1]))
        o = f(params, inputs, sample[2])
        return F.cross_entropy(o, targets), o

    def reset_meters():
        classerr.reset()
        meter_loss.reset()

    def on_sample(state):
        state["sample"].append(state["train"])

    def on_forward(state):
        classerr.add(state["output"].data,
                     torch.LongTensor(state["sample"][1]))
        meter_loss.add(state["loss"].data[0])

    def on_start_epoch(state):
        reset_meters()
        state["iterator"] = tqdm(state["iterator"])

    def on_end_epoch(state):
        print("Training loss: %.4f, accuracy: %.2f%%" %
              (meter_loss.value()[0], classerr.value()[0]))
        # do validation at the end of each epoch
        reset_meters()
        engine.test(h, get_iterator(False))
        print("Testing loss: %.4f, accuracy: %.2f%%" %
              (meter_loss.value()[0], classerr.value()[0]))

    engine.hooks["on_sample"] = on_sample
    engine.hooks["on_forward"] = on_forward
    engine.hooks["on_start_epoch"] = on_start_epoch
    engine.hooks["on_end_epoch"] = on_end_epoch
    engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
Esempio n. 6
0
def train_and_track(train_dataloader, test_dataloader):
    criterion = nn.CrossEntropyLoss()

    model = resnext18()
    learning_rate = 0.001
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    engine = Engine()

    port = 8097
    train_loss_logger = VisdomPlotLogger(
        'line', port=port, opts={'title': 'Train CrossEntropyLoss'})
    train_err_logger = VisdomPlotLogger('line',
                                        port=port,
                                        opts={'title': 'Train Class Accuracy'})
    test_loss_logger = VisdomPlotLogger(
        'line', port=port, opts={'title': 'Test CrossEntropyLoss'})
    test_err_logger = VisdomPlotLogger('line',
                                       port=port,
                                       opts={'title': 'Test Class Accuracy'})

    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)

    def run_model(sample):
        images, labels = sample
        outputs = model(images)
        loss = criterion(outputs, labels)
        return loss, outputs

    def reset_meters():
        classerr.reset()
        meter_loss.reset()

    def on_forward(state):
        classerr.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data.item())

    def on_start_epoch(state):
        reset_meters()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):
        train_loss_logger.log(state['epoch'], meter_loss.value()[0])
        train_err_logger.log(state['epoch'], classerr.value()[0])

        # Check accuracy on test after each epoch.
        reset_meters()
        engine.test(run_model, test_dataloader)
        test_loss_logger.log(state['epoch'], meter_loss.value()[0])
        test_err_logger.log(state['epoch'], classerr.value()[0])

    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(run_model, train_dataloader, maxepoch=10, optimizer=optimizer)
Esempio n. 7
0
def test(model, testloader, loss_function, device):
    model.eval()
    model.to(device)

    engine = Engine()

    def compute_loss(data):
        inputs = data[0]
        labels = data[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        return loss_function(outputs, labels), outputs

    def on_start(state):
        print("Running inference ...")
        state['iterator'] = tqdm(state['iterator'], leave=False)

    class Accuracy():
        _accuracy = 0.
        _sample_size = 0.

    def on_forward(state):
        batch_size = state['sample'][1].shape[0]
        Accuracy._sample_size += batch_size
        Accuracy._accuracy += batch_size * get_accuracy(state['output'].cpu(), state['sample'][1].cpu())

    engine.hooks['on_start'] = on_start
    engine.hooks['on_forward'] = on_forward

    engine.test(compute_loss, testloader)

    return Accuracy._accuracy / Accuracy._sample_size
Esempio n. 8
0
def train(batch_size=512, epochs=100):
    from torch.autograd import Variable
    import torchnet as tnt
    from torchnet.engine import Engine
    from tensorboardX import SummaryWriter
    from autoencoders.models.sampling import sample_vae
    import autoencoders.data.mnist as mnist
    from autoencoders.utils.tensorboard import run_path

    use_gpu = torch.cuda.is_available()

    writer = SummaryWriter(run_path('conv_vae'))

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()

    model = ConvolutionalVariationalAutoencoder()
    optimizer = torch.optim.Adam(model.parameters(), 3e-4)

    dataloader = mnist(batch_size=batch_size)

    if use_gpu:
        model.cuda()
        vae_loss.cuda()

    def h(sample):
        inputs, _ = sample

        inputs = Variable(inputs)
        if use_gpu:
            inputs = inputs.cuda()

        output, mu, logvar = model(inputs)
        loss = vae_loss(output, inputs, mu, logvar)

        return loss, output

    def on_forward(state):
        meter_loss.add(state['loss'].data[0])

    def on_start_epoch(state):
        meter_loss.reset()

    def on_end_epoch(state):
        writer.add_scalar('loss', meter_loss.value()[0], state['epoch'])
        writer.add_image('image', sample_vae(
            model, dataloader), state['epoch'])

        meter_loss.reset()

    # engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h, dataloader, maxepoch=epochs, optimizer=optimizer)
Esempio n. 9
0
 def __init__(self):
     self._engine: Engine = Engine()
     # All possible hooks that is called by engine.train and engine.test
     self._hooks: Dict[str, Callable] = dict({
                 "on_start": self.on_start,              # start of the procedure
                 "on_start_epoch": self.on_start_epoch,  # train exclusive
                 "on_sample": self.on_sample,            # get data point from the data-loader
                 "on_forward": self.on_forward,          # the only phase that both train/test applies
                 "on_update": self.on_update,            # train exclusive, after the "step" of backward updating
                 "on_end_epoch": self.on_end_epoch,      # train exclusive - usually test is invoked here
                 "on_end": self.on_end,                  # end of the procedure
     })
     self.engine.hooks = self.hooks
Esempio n. 10
0
def calibrate(model, testloader, loss_function, device):
    """Calibrates the weight and activation quantization parameters.

    Executes forward passes using the input data from `testloader`.
    For every forward pass, collects the statistics and calibrates
    the quantization parameters for all the weight and activation
    quant modules.

    Arguments:
        model (:class:`QuantizedNet`): nn model to train
        testloader (:class:`torch.utils.data.DataLoader`): dataloader to
            iterate through the sample data for calibration
        loss_function (:class:`torch.nn._Loss`): function to compute loss
        device (:class:`torch.device`): the device to run calibration on

    Returns:
        Module: calibrated quantized model
    """
    model.eval()
    model.to(device)

    engine = Engine()

    # Enable profiling to collect statistics and calibrate quant params
    model._profile()

    # Quantize weights
    model._quantize_weights()

    def compute_loss(data):
        """Computes the loss from a given nn model."""
        inputs = data[0]
        labels = data[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        return loss_function(outputs, labels), outputs

    def on_start(state):
        print("Calibrating quantized network ...")
        state['iterator'] = tqdm(state['iterator'], leave=False)

    def on_forward(state):
        loss = state['loss'].item()
        accuracy = get_accuracy(state['output'].cpu(),
                                state['sample'][1].cpu())
        state['iterator'].write('batch %d loss %.3f accuracy %.3f ' %
                                (state['t'], loss, accuracy),
                                end='\n')

    engine.hooks['on_start'] = on_start
    engine.hooks['on_forward'] = on_forward

    engine.test(compute_loss, testloader)

    return model
Esempio n. 11
0
def main():
    params = {
            'conv0.weight': conv_init(1, 50, 5),  'conv0.bias': torch.zeros(50),
            'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50),
            'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512),
            'linear3.weight': linear_init(512, 10),  'linear3.bias': torch.zeros(10),
            }
    params = {k: Variable(v, requires_grad=True) for k, v in params.items()}

    optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005)

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)

    def h(sample):
        inputs = Variable(sample[0].float() / 255.0)
        targets = Variable(torch.LongTensor(sample[1]))
        o = f(params, inputs, sample[2])
        return F.cross_entropy(o, targets), o

    def reset_meters():
        classerr.reset()
        meter_loss.reset()

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classerr.add(state['output'].data, torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start_epoch(state):
        reset_meters()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):
        print 'Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])
        # do validation at the end of each epoch
        reset_meters()
        engine.test(h, get_iterator(False))
        print 'Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])

    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
Esempio n. 12
0
def test(model, testloader, loss_function, device):
    r"""Computes the accuracy and loss of the model for a given datatset.

    Arguments:
        model (:class:`torch.nn.Module`): nn model
        lossf
        testloader (:class:`torch.utils.data.DataLoader`): dataloader to
            iterate through the data
        loss_function (:class:`torch.nn._Loss`): function to compute loss
        device (:class:`torch.device`): the device to run inference on

    Returns:
        accuracy (float): accuracy of the network on given dataset
    """
    model.eval()
    model.to(device)

    engine = Engine()

    def compute_loss(data):
        """Computes the loss from a given nn model."""
        inputs = data[0]
        labels = data[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        return loss_function(outputs, labels), outputs

    def on_start(state):
        print("Running inference ...")
        state['iterator'] = tqdm(state['iterator'], leave=False)

    class Accuracy():
        _accuracy = 0.;
        _sample_size = 0.

    def on_forward(state):
        batch_size = state['sample'][1].shape[0]
        Accuracy._sample_size += batch_size
        Accuracy._accuracy += batch_size * get_accuracy(state['output'].cpu(), state['sample'][1].cpu())

    engine.hooks['on_start'] = on_start
    engine.hooks['on_forward'] = on_forward

    engine.test(compute_loss, testloader)

    return Accuracy._accuracy / Accuracy._sample_size
Esempio n. 13
0
def main():

    opt = parser.parse_args()
    print 'parsed options:', vars(opt)

    # convert json data to python object
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id  # 0
    # to prevent opencv from initializing CUDA in workers
    torch.randn(8).cuda()
    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # set it to empty string

    def create_iterator(mode):
        ds = create_dataset(opt, mode)
        return ds.parallel(batch_size=opt.batchSize,
                           shuffle=mode,
                           num_workers=opt.nthread,
                           pin_memory=True)

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    # deal with student first
    f_s, params_s, stats_s = resnet(opt.depth, opt.width, num_classes)

    # deal with teacher
    if opt.teacher_id != '':
        with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff:
            line = ff.readline()
            r = line.find('json_stats')
            info = json.loads(line[r + 12:])
        f_t = resnet(info['depth'], info['width'], num_classes)[0]
        model_data = torch.load(
            os.path.join('logs', opt.teacher_id, 'model.pt7'))
        params_t = model_data['params']
        stats_t = model_data['stats']

        # merge teacher and student params and stats
        params = {'student.' + k: v for k, v in params_s.iteritems()}
        for k, v in params_t.iteritems():
            v.requires_grad = False
            params['teacher.' + k] = v
        stats = {'student.' + k: v for k, v in stats_s.iteritems()}
        stats.update({'teacher.' + k: v for k, v in stats_t.iteritems()})

        def f(inputs, params, stats, mode):
            y_s, g_s = f_s(inputs, params, stats, mode, 'student.')
            y_t, g_t = f_t(inputs, params, stats, False, 'teacher.')
            return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]
    else:
        f, params, stats = f_s, params_s, stats_s

    optimizable = [v for v in params.itervalues() if v.requires_grad]

    def create_optimizer(opt, lr):
        print 'creating optimizer with lr = ', lr
        if opt.optim_method == 'SGD':
            return torch.optim.SGD(optimizable,
                                   lr,
                                   0.9,
                                   weight_decay=opt.weightDecay)
        elif opt.optim_method == 'Adam':
            return torch.optim.Adam(optimizable, lr)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params, stats = state_dict['params'], state_dict['stats']
        optimizer.load_state_dict(state_dict['optimizer'])

    print '\nParameters:'
    print pd.DataFrame([(key, v.size(), torch.typename(v.data))
                        for key, v in params.items()])
    print '\nAdditional buffers:'
    print pd.DataFrame([(key, v.size(), torch.typename(v))
                        for key, v in stats.items()])

    n_parameters = sum(
        [p.numel() for p in params_s.values() + stats_s.values()])
    print '\nTotal number of parameters:', n_parameters

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    meters_at = [tnt.meter.AverageValueMeter() for i in range(3)]

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = Variable(cast(sample[0], opt.dtype))
        targets = Variable(cast(sample[1], 'long'))
        if opt.teacher_id != '':  # if there is teacher id
            y_s, y_t, loss_groups = data_parallel(f, inputs, params,
                                                  stats, sample[2],
                                                  np.arange(opt.ngpu))
            loss_groups = [v.sum() for v in loss_groups]
            [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)]
            return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \
                    + opt.beta * sum(loss_groups), y_s
        else:
            y = data_parallel(f, inputs, params, stats, sample[2],
                              np.arange(opt.ngpu))[0]
            return F.cross_entropy(y, targets), y

    def log(t):
        torch.save(
            dict(params=params,
                 stats=stats,
                 optimizer=optimizer.state_dict(),
                 epoch=t['epoch']),
            open(os.path.join(opt.save, 'model.pt7'), 'w'))
        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print z

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        [meter.reset() for meter in meters_at]
        state['iterator'] = tqdm(train_loader)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            optimizer = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, test_loader)

        test_acc = classacc.value()[0]
        print log({
            "train_loss": train_loss[0],
            "train_acc": train_acc[0],
            "test_loss": meter_loss.value()[0],
            "test_acc": test_acc,
            "epoch": state['epoch'],
            "num_classes": num_classes,
            "n_parameters": n_parameters,
            "train_time": train_time,
            "test_time": timer_test.value(),
            "at_losses": [m.value() for m in meters_at],
        })
        print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
                       (opt.save, state['epoch'], opt.epochs, test_acc)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
def main():
    global CONST_STEP_FLAG
    CONST_STEP_FLAG = 0
    # opt = parser.parse_args()
    # option note
    assert not (
        opt.beta and opt.gamma
    ), "Can't support attention-transfer and rocket-launching together"

    print 'parsed options:', vars(opt)
    epoch_step = json.loads(opt.epoch_step)
    sigma_refine_step = json.loads(opt.sigma_refine_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id
    # to prevent opencv from initializing CUDA in workers
    torch.randn(8).cuda()
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

    def create_iterator(mode):
        ds = create_dataset(opt, mode)
        return ds.parallel(batch_size=opt.batchSize,
                           shuffle=mode,
                           num_workers=opt.nthread,
                           pin_memory=True)

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    # deal with student first
    f_s, params_s, stats_s = resnet(opt.depth, opt.width, num_classes,
                                    opt.student_depth)

    # deal with teacher
    if opt.teacher_id != '':
        with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff:
            line = ff.readline()
            r = line.find('json_stats')
            info = json.loads(line[r + 12:])
        f_t = resnet(info['depth'], info['width'], num_classes)[0]
        model_data = torch.load(
            os.path.join('logs', opt.teacher_id, 'model.pt7'))
        params_t = model_data['params']
        stats_t = model_data['stats']

        # merge teacher and student params and stats
        params = {'student.' + k: v for k, v in params_s.iteritems()}
        for k, v in params_t.iteritems():
            params['teacher.' + k] = Variable(v)
        stats = {'student.' + k: v for k, v in stats_s.iteritems()}
        stats.update({'teacher.' + k: v for k, v in stats_t.iteritems()})

        def f(inputs, params, stats, mode):
            if opt.gamma:
                y_s, y_t_auto, g_s = f_s(inputs, params, stats, mode,
                                         'student.')
                y_t, g_t = f_t(inputs, params, stats, False, 'teacher.')
                return y_s, y_t_auto, y_t
            else:
                y_s, g_s = f_s(inputs, params, stats, mode, 'student.')
                y_t, g_t = f_t(inputs, params, stats, False, 'teacher.')
                return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]

    else:
        f, params, stats = f_s, params_s, stats_s

    optimizable = [v for v in params.itervalues() if v.requires_grad]

    def create_optimizer(opt, lr):
        print 'creating optimizer with lr = ', lr
        if opt.optim_method == 'SGD':
            return torch.optim.SGD(optimizable,
                                   lr,
                                   0.9,
                                   weight_decay=opt.weightDecay)
        elif opt.optim_method == 'Adam':
            return torch.optim.Adam(optimizable, lr)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors, stats = state_dict['params'], state_dict['stats']
        for k, v in params.iteritems():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print '\nParameters:'
    print pd.DataFrame([(key, v.size(), torch.typename(v.data))
                        for key, v in params.items()])
    print '\nAdditional buffers:'
    print pd.DataFrame([(key, v.size(), torch.typename(v))
                        for key, v in stats.items()])

    n_parameters = sum(p.numel() for p in params_s.values())
    print '\nTotal number of parameters:', n_parameters

    if opt.gamma:
        meter_loss_s = tnt.meter.AverageValueMeter()
        meter_loss_t = tnt.meter.AverageValueMeter()
        meter_loss_c = tnt.meter.AverageValueMeter()
        meter_loss_d = tnt.meter.AverageValueMeter()
        classacc_s = tnt.meter.ClassErrorMeter(accuracy=True)
        classacc_t = tnt.meter.ClassErrorMeter(accuracy=True)
    else:
        classacc = tnt.meter.ClassErrorMeter(accuracy=True)

    meter_loss = tnt.meter.AverageValueMeter()
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    meters_at = [tnt.meter.AverageValueMeter() for i in range(3)]

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = Variable(cast(sample[0], opt.dtype))
        targets = Variable(cast(sample[1], 'long'))
        if opt.teacher_id != '':
            if opt.gamma:
                ys, y_t_auto, y_t = data_parallel(f, inputs, params,
                                                  stats, sample[2],
                                                  np.arange(opt.ngpu))[:3]
                loss_l2 = torch.nn.MSELoss()
                T = 4
                loss_student = F.cross_entropy(ys, targets)
                loss_teacher = F.cross_entropy(y_t_auto, targets)
                loss_course = opt.beta * \
                    ((y_t_auto - ys) * (y_t_auto - ys)).sum() / opt.batchSize
                y_tech_temp = torch.autograd.Variable(y_t_auto.data,
                                                      requires_grad=False)
                log_kd = rocket_distillation(ys, y_t, targets, opt.temperature,
                                             opt.alpha)
                return rocket_distillation(ys, y_t, targets, opt.temperature, opt.alpha) \
                    + F.cross_entropy(y_t_auto, targets) + F.cross_entropy(ys, targets) + opt.beta * ((y_tech_temp - ys) * (
                        y_tech_temp - ys)).sum() / opt.batchSize, (ys, y_t_auto, loss_student, loss_teacher, loss_course, log_kd)
            else:
                y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats,
                                                      sample[2],
                                                      np.arange(opt.ngpu))
                loss_groups = [v.sum() for v in loss_groups]
                [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)]
                return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \
                    + opt.beta * sum(loss_groups), y_s
        else:
            if opt.gamma:
                ys, y = data_parallel(f, inputs, params, stats, sample[2],
                                      np.arange(opt.ngpu))[:2]
                loss_l2 = torch.nn.MSELoss()
                T = 4
                loss_student = F.cross_entropy(ys, targets)
                loss_teacher = F.cross_entropy(y, targets)
                loss_course = opt.beta * \
                    ((y - ys) * (y - ys)).sum() / opt.batchSize
                if opt.grad_block:
                    y_course = torch.autograd.Variable(y.data,
                                                       requires_grad=False)
                else:
                    y_course = y
                return F.cross_entropy(y, targets) + F.cross_entropy(
                    ys, targets) + opt.beta * (
                        (y_course - ys) *
                        (y_course - ys)).sum() / opt.batchSize, (ys, y,
                                                                 loss_student,
                                                                 loss_teacher,
                                                                 loss_course)
            else:
                y = data_parallel(f, inputs, params, stats, sample[2],
                                  np.arange(opt.ngpu))[0]
                return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(
            dict(params={k: v.data
                         for k, v in params.iteritems()},
                 stats=stats,
                 optimizer=state['optimizer'].state_dict(),
                 epoch=t['epoch']),
            open(os.path.join(opt.save, 'model.pt7'), 'w'))
        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print z

    def on_sample(state):
        state['sample'].append(state['train'])

    if opt.gamma:

        def on_forward(state):
            classacc_s.add(state['output'][0].data,
                           torch.LongTensor(state['sample'][1]))
            classacc_t.add(state['output'][1].data,
                           torch.LongTensor(state['sample'][1]))
            meter_loss.add(state['loss'].data[0])
            meter_loss_s.add(state['output'][2].data[0])
            meter_loss_t.add(state['output'][3].data[0])
            meter_loss_c.add(state['output'][4].data[0])

        def on_start_epoch(state):
            classacc_s.reset()
            classacc_t.reset()
            meter_loss.reset()
            meter_loss_s.reset()
            meter_loss_t.reset()
            meter_loss_c.reset()
            timer_train.reset()
            [meter.reset() for meter in meters_at]
            # state['iterator'] = tqdm(train_loader)

            epoch = state['epoch'] + 1
            if epoch in sigma_refine_step:
                opt.running_sigma += opt.beta
            if epoch in epoch_step:
                lr = state['optimizer'].param_groups[0]['lr']
                state['optimizer'] = create_optimizer(opt,
                                                      lr * opt.lr_decay_ratio)

        def on_end_epoch(state):
            train_loss = meter_loss.value()
            train_loss_s = meter_loss_s.value()
            train_loss_t = meter_loss_t.value()
            train_loss_c = meter_loss_c.value()
            train_acc_s = classacc_s.value()
            train_acc_t = classacc_t.value()
            train_time = timer_train.value()
            meter_loss.reset()
            meter_loss_s.reset()
            meter_loss_t.reset()
            meter_loss_c.reset()
            classacc_s.reset()
            classacc_t.reset()
            timer_test.reset()

            engine.test(h, test_loader)

            test_acc_s = classacc_s.value()[0]
            test_acc_t = classacc_t.value()[0]
            print log(
                {
                    "train_loss": train_loss[0],
                    "train_acc_student": train_acc_s[0],
                    "train_acc_teacher": train_acc_t[0],
                    "test_loss": meter_loss.value()[0],
                    "test_loss_student": meter_loss_s.value()[0],
                    "test_loss_teacher": meter_loss_t.value()[0],
                    "test_loss_course": meter_loss_c.value()[0],
                    "test_acc_student": test_acc_s,
                    "test_acc_teacher": test_acc_t,
                    "epoch": state['epoch'],
                    "num_classes": num_classes,
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                    "at_losses": [m.value() for m in meters_at],
                }, state)
            print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
                (opt.save, state['epoch'], opt.epochs, test_acc_s)
    else:

        def on_forward(state):
            classacc.add(state['output'].data,
                         torch.LongTensor(state['sample'][1]))
            meter_loss.add(state['loss'].data[0])

        def on_start_epoch(state):
            classacc.reset()
            meter_loss.reset()
            timer_train.reset()
            [meter.reset() for meter in meters_at]
            # state['iterator'] = tqdm(train_loader)

            epoch = state['epoch'] + 1
            if epoch in epoch_step:
                lr = state['optimizer'].param_groups[0]['lr']
                state['optimizer'] = create_optimizer(opt,
                                                      lr * opt.lr_decay_ratio)

        def on_end_epoch(state):
            train_loss = meter_loss.value()
            train_acc = classacc.value()
            train_time = timer_train.value()
            meter_loss.reset()
            classacc.reset()
            timer_test.reset()

            engine.test(h, test_loader)

            test_acc = classacc.value()[0]
            print log(
                {
                    "train_loss": train_loss[0],
                    "train_acc": train_acc[0],
                    "test_loss": meter_loss.value()[0],
                    "test_acc": test_acc,
                    "epoch": state['epoch'],
                    "num_classes": num_classes,
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                    "at_losses": [m.value() for m in meters_at],
                }, state)
            print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
                (opt.save, state['epoch'], opt.epochs, test_acc)

    def on_start(state):
        state['epoch'] = epoch

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
Esempio n. 15
0

if __name__ == "__main__":
    from torch.autograd import Variable
    from torch.optim import Adam
    from torchnet.engine import Engine
    from torchvision.utils import make_grid
    from torchvision.datasets.mnist import MNIST
    from tqdm import tqdm
    import torchnet as tnt
    import h5py
    import os
    from collections import OrderedDict

    model = CapsuleNet()
    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    mymeter = Mymeter(NUM_CLASSES)
    loss_func = F.binary_cross_entropy

    train_path = '/home/LAB/penghao/mars/metadata/test'
    train_dir = os.listdir(train_path)
    train_num = len(train_dir)
    index = 0

    def get_iterator(mode):
        if mode:
            train_path = '/home/LAB/penghao/mars/metadata/train'
            dir = os.listdir(train_path)
            data = None
            labels = None
Esempio n. 16
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(train):
        return DataLoader(create_dataset(opt, train),
                          batch_size=opt.batch_size,
                          shuffle=train,
                          num_workers=opt.nthread,
                          pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    f, params, stats = resnet(opt.depth, opt.width, num_classes)

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD(params.values(), lr, 0.9, weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors, stats = state_dict['params'], state_dict['stats']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    print_tensor_dict(params)
    print('\nAdditional buffers:')
    print_tensor_dict(stats)

    n_parameters = sum(p.numel() for p in params.values())
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = Variable(cast(sample[0], opt.dtype))
        targets = Variable(cast(sample[1], 'long'))
        y = data_parallel(f, inputs, params, stats, sample[2],
                          list(range(opt.ngpu)))
        return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(
            dict(params={k: v.data
                         for k, v in params.items()},
                 stats=stats,
                 optimizer=state['optimizer'].state_dict(),
                 epoch=t['epoch']),
            open(os.path.join(opt.save, 'model.pt7'), 'wb'))
        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        state['iterator'] = tqdm(train_loader)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, test_loader)

        test_acc = classacc.value()[0]
        print(
            log(
                {
                    "train_loss": train_loss[0],
                    "train_acc": train_acc[0],
                    "test_loss": meter_loss.value()[0],
                    "test_acc": test_acc,
                    "epoch": state['epoch'],
                    "num_classes": num_classes,
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                }, state))
        print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
              (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  ###multiple gpu
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(mode):
        return DataLoader(create_dataset(opt, mode),
                          opt.batch_size,
                          shuffle=mode,
                          num_workers=opt.nthread,
                          pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    # deal with student first
    f_s, params_s = resnet(opt.depth, opt.width, num_classes)

    # deal with teacher
    if opt.teacher_id:
        with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff:
            line = ff.readline()
            r = line.find('json_stats')
            info = json.loads(line[r + 12:])
        f_t = resnet(info['depth'], info['width'], num_classes)[0]
        model_data = torch.load(
            os.path.join('logs', opt.teacher_id, 'model.pt7'))
        params_t = model_data['params']

        # merge teacher and student params
        params = {'student.' + k: v for k, v in params_s.items()}
        for k, v in params_t.items():
            if not (k.startswith("teacher")):
                k = k.replace("student.", "")
                params['teacher.' + k] = v.detach().requires_grad_(False)

        def f(inputs, params, mode):
            y_s, g_s = f_s(inputs, params, mode, 'student.')
            with torch.no_grad():
                y_t, g_t = f_t(inputs, params, False, 'teacher.')
            return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)]
    else:
        f, params = f_s, params_s

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD((v for v in params.values() if v.requires_grad),
                   lr,
                   momentum=0.9,
                   weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors = state_dict['params']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    utils.print_tensor_dict(params)

    n_parameters = sum(p.numel() for p in list(params_s.values()))
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    meters_at = [tnt.meter.AverageValueMeter() for i in range(3)]

    opt.save = opt.save + "_" + opt.dataset + "_epochs_" + str(opt.epochs)
    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    writer = SummaryWriter(opt.save)

    def h(sample):
        inputs = utils.cast(sample[0], opt.dtype).detach()
        targets = utils.cast(sample[1], 'long')
        if opt.teacher_id != '':
            y_s, y_t, loss_groups = utils.data_parallel(
                f, inputs, params, sample[2], range(opt.ngpu))
            loss_groups = [v.sum() for v in loss_groups]
            [m.add(v.item()) for m, v in zip(meters_at, loss_groups)]
            return utils.distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \
                   + opt.beta * sum(loss_groups), y_s
        else:
            y = utils.data_parallel(f, inputs, params, sample[2],
                                    range(opt.ngpu))[0]
            return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(
            dict(params={k: v.data
                         for k, v in params.items()},
                 optimizer=state['optimizer'].state_dict(),
                 epoch=t['epoch']),
            os.path.join(opt.save, 'model.pt7'))  #정해준 path에 모델을 save 한다.
        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])  #sample을 train상태에 올린다.

    def on_forward(state):
        classacc.add(state['output'].data, state['sample'][1])  #
        meter_loss.add(state['loss'].item())

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        [meter.reset() for meter in meters_at]
        state['iterator'] = tqdm(train_loader)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.mean
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, test_loader)  #upward

        test_acc = classacc.value()[0]

        writer.add_scalar('loss/train', train_loss, state['epoch'])
        writer.add_scalar('acc/train', train_acc[0], state['epoch'])
        writer.add_scalar('loss/test', meter_loss.mean, state['epoch'])
        writer.add_scalar('acc/test', test_acc, state['epoch'])

        print(
            log(
                {
                    "train_loss": train_loss,
                    "train_acc": train_acc[0],
                    "test_loss": meter_loss.mean,
                    "test_acc": test_acc,
                    "epoch": state['epoch'],
                    "num_classes": num_classes,
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                    "at_losses": [m.value() for m in meters_at],
                }, state))
        print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
              (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)

    writer.close()
Esempio n. 18
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100
    log_step = 1
    assert opt.subset_size in [100, 500, 1000, -1
                               ], 'subset size should be 100, 500, 1000 or -1'
    assert opt.subset_id in [1, 2, 3, 4, 5,
                             -1], 'subset ide should be 1-5 or -1'
    if opt.subset_size in [100, 500, 1000]:
        log_step = 10000 // opt.subset_size

    torch.manual_seed(opt.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(mode):
        shuffle = mode and (opt.subset_size == -1 or opt.subset_id == -1)
        sampler = None
        if mode and not shuffle:
            ind = np.loadtxt('subsets/subset_' + str(opt.subset_size) + '_' +
                             str(opt.subset_id) + '.txt',
                             dtype=np.int64)
            sampler = SubsetRandomSampler(ind)

        return DataLoader(create_dataset(opt, mode),
                          opt.batch_size,
                          sampler=sampler,
                          shuffle=shuffle,
                          num_workers=opt.nthread,
                          pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    kwargs = {}
    if not opt.level is None:
        kwargs.update({'level': opt.level})
    f, params = resnet(opt.depth, opt.width, num_classes, opt.dropout,
                       **kwargs)

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD([v for v in params.values() if v.requires_grad],
                   lr,
                   momentum=0.9,
                   weight_decay=opt.weight_decay,
                   nesterov=opt.nesterov)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors = state_dict['params']
        for k, v in params.items():
            if k in params_tensors:
                v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    print_tensor_dict(params)

    n_parameters = sum(p.numel() for p in params.values() if p.requires_grad)
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = cast(sample[0], opt.dtype)
        targets = cast(sample[1], 'long')
        y = data_parallel(f, inputs, params, sample[2],
                          list(range(opt.ngpu))).float()
        return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(
            dict(params={
                k: v
                for k, v in params.items() if k.find('dct') == -1
            },
                 epoch=t['epoch'],
                 optimizer=state['optimizer'].state_dict()),
            os.path.join(opt.save, 'model.pt7'))
        z = vars(opt).copy()
        z.update(t)
        with open(os.path.join(opt.save, 'log.txt'), 'a') as flog:
            flog.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        loss = float(state['loss'])
        classacc.add(state['output'].data, state['sample'][1])
        meter_loss.add(loss)
        if state['train']:
            state['iterator'].set_postfix(loss=loss)

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        state['iterator'] = tqdm(train_loader, dynamic_ncols=True)
        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        if state['epoch'] % log_step == 0:
            train_loss = meter_loss.value()
            train_acc = classacc.value()
            train_time = timer_train.value()
            meter_loss.reset()
            classacc.reset()
            timer_test.reset()

            with torch.no_grad():
                engine.test(h, test_loader)

            test_acc = classacc.value()[0]
            print(
                log(
                    {
                        "train_loss": train_loss[0],
                        "train_acc": train_acc[0],
                        "test_loss": meter_loss.value()[0],
                        "test_acc": test_acc,
                        "epoch": state['epoch'],
                        "num_classes": num_classes,
                        "n_parameters": n_parameters,
                        "train_time": train_time,
                        "test_time": timer_test.value(),
                    }, state))
            print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' %
                  (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
Esempio n. 19
0
def main(cfg, cuda=torch.cuda.is_available()):
    ### flush cfg to output log file:
    tqdm.write(str(cfg), file=cfg['logfile'])
    tqdm.write('-' * 80, file=cfg['logfile'])

    ### define dataloader factory:
    def get_iterator():
        # set up dataloader config:
        datasets = cfg['data_paths']
        pin_mem = cuda
        nworkers = cfg['num_workers']

        # (possibly) concatenate datasets together:
        ds = SeqTensorDataset(torch.load(datasets[0][0]),
                              torch.load(datasets[0][1]),
                              torch.load(datasets[0][2]),
                              torch.load(datasets[0][3]))
        for dataset in datasets[1:]:
            ds += SeqTensorDataset(torch.load(dataset[0]),
                                   torch.load(dataset[1]),
                                   torch.load(dataset[2]),
                                   torch.load(dataset[3]))

        # return a dataloader iterating over datasets; pagelock memory location if GPU detected:
        return DataLoader(ds,
                          batch_size=cfg['batch_size'],
                          shuffle=True,
                          num_workers=nworkers,
                          collate_fn=sequence_collate_fn,
                          pin_memory=pin_mem)

    ### build RawCTCNet model:
    in_dim = 1
    layers = [(256, 256, d, 3)
              for d in [1, 2, 4, 8, 16, 32, 64]] * cfg['num_stacks']
    num_labels = 5
    out_dim = 512
    network = RawCTCNet(in_dim,
                        num_labels,
                        layers,
                        out_dim,
                        input_kw=1,
                        input_dil=1,
                        positions=True,
                        softmax=False,
                        causal=False,
                        batch_norm=True)
    print("Constructed network.")
    if cuda:
        print("CUDA detected; placed network on GPU.")
        network.cuda()
    if cfg['model'] is not None:
        print("Loading model file...")
        try:
            network.load_state_dict(torch.load(cfg['model']))
        except:
            print(
                "ERR: could not restore model. Check model datatype/dimensions."
            )

    ### build CTC loss function and model evaluation function:
    ctc_loss_fn = CTCLoss()
    print("Constructed CTC loss function.")
    maybe_gpu = lambda tsr, has_cuda: tsr if not has_cuda else tsr.cuda()

    def model_loss(sample):
        # unpack inputs and wrap in Variables:
        signals_, signal_lengths_, sequences_, sequence_lengths_ = sample
        signals = Variable(maybe_gpu(signals_.permute(0, 2, 1), cuda),
                           volatile=True)  # BxTxD => BxDxT
        signal_lengths = Variable(signal_lengths_, volatile=True)
        sequences = Variable(concat_labels(sequences_, sequence_lengths_),
                             volatile=True)
        sequence_lengths = Variable(sequence_lengths_, volatile=True)
        # compute predicted labels:
        transcriptions = network(signals).permute(2, 0,
                                                  1)  # Permute: BxDxT => TxBxD
        # compute CTC loss and return:
        loss = ctc_loss_fn(transcriptions, sequences.int(),
                           signal_lengths.int(), sequence_lengths.int())
        return loss, transcriptions

    ### build beam search decoder:
    beam_labels = [' ', 'A', 'G', 'C', 'T']
    beam_blank_id = 0
    beam_decoder = CTCBeamDecoder(beam_labels,
                                  beam_width=100,
                                  blank_id=beam_blank_id,
                                  num_processes=cfg['num_workers'])
    print("Constructed CTC beam search decoder.")

    ### build engine, meters, and hooks:
    engine = Engine()

    # Wrap a tqdm meter around the losses:
    def on_start(state):
        network.eval()
        state['iterator'] = tqdm(state['iterator'])

    # (Currently don't do anything w/r/t the sample.)
    def on_sample(state):
        pass

    # occasionally log the loss value and perform beam search decoding:
    def on_forward(state):
        if (state['t'] % cfg['print_every'] == 0):
            # log the ctc loss:
            tqdm.write("Step {0} | Loss: {1}".format(state['t'],
                                                     state['loss'].data[0],
                                                     file=cfg['logfile']))
            # beam search decoding:
            _, logit_lengths_t, seq_t, seq_lengths_t = state['sample']
            scores = mask_padding(state['output'].permute(1, 0, 2),
                                  logit_lengths_t,
                                  fill_logit_idx=0)
            logits = F.softmax(scores, dim=2)
            _nt_dict_ = {0: ' ', 1: 'A', 2: 'G', 3: 'C', 4: 'T'}

            def convert_to_string(toks, voc, num):
                try:
                    nt = ''.join([voc[t] for t in toks[0:num]])
                except:
                    nt = ''
                return nt

            try:
                true_nts = labels2strings(seq_t, lookup=_nt_dict_)
                amax_nts = labels2strings(argmax_decode(logits),
                                          lookup=_nt_dict_)
                beam_result, beam_scores, beam_times, beam_lengths = beam_decoder.decode(
                    logits.data)
                pred_nts = [
                    convert_to_string(beam_result[k][0], _nt_dict_,
                                      beam_lengths[k][0])
                    for k in range(len(beam_result))
                ]
                for i in range(min(len(true_nts), len(pred_nts))):
                    tqdm.write("True Seq: {0}".format(true_nts[i]),
                               file=cfg['logfile'])
                    tqdm.write("Beam Seq: {0}".format(pred_nts[i]),
                               file=cfg['logfile'])
                    tqdm.write("Amax Seq: {0}".format(amax_nts[i]),
                               file=cfg['logfile'])
                    tqdm.write(
                        ("- " * 10 + "Local Beam Alignment" + " -" * 10),
                        file=cfg['logfile'])
                    tqdm.write(ssw(true_nts[i], pred_nts[i]),
                               file=cfg['logfile'])
                    tqdm.write("= " * 40, file=cfg['logfile'])
            except:
                tqdm.write("(WARN: Could not parse batch; skipping...)",
                           file=cfg['logfile'])

    # (Currently don't do anything at end of epoch.)
    def on_end(state):
        pass

    print("Constructed engine. Running validation loop...")

    ### run validation loop:
    engine.hooks['on_start'] = on_start
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_end'] = on_end
    engine.test(model_loss, get_iterator())
Esempio n. 20
0
    from torchnet.engine import Engine
    from torchnet.logger import VisdomPlotLogger, VisdomLogger
    from torchvision.utils import make_grid
    from torchvision.datasets.mnist import MNIST
    from tqdm import tqdm
    import torchnet as tnt

    model = CapsuleNet()
    # model.load_state_dict(torch.load('epochs/epoch_327.pt'))
    model.cuda()

    print("# parameters:", sum(param.numel() for param in model.parameters()))

    optimizer = Adam(model.parameters())

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True)
    confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True)

    train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'})
    train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'})
    test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'})
    test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'})
    confusion_logger = VisdomLogger('heatmap', opts={'title': 'Confusion matrix',
                                                     'columnnames': list(range(NUM_CLASSES)),
                                                     'rownames': list(range(NUM_CLASSES))})
    ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'})
    reconstruction_logger = VisdomLogger('image', opts={'title': 'Reconstruction'})

    capsule_loss = CapsuleLoss()
Esempio n. 21
0
def main(timestamp, batch_size, mini_batch_size=128):
    logging.debug('\tLoading %s data iterators' % CONFIG['dataset'].name)

    # Data loading
    dataset_name = CONFIG['dataset'].name
    dataroot = CONFIG['dataset'].datadir

    use_gpu = CONFIG['general'].use_gpu
    num_gpus = len(CONFIG['general'].gpus.split(',')) if use_gpu else 0
    num_classes = 10 if dataset_name == 'CIFAR10' else 100  # TODO make dataset-dependent lol
    train_loader, test_loader = create_data_iterators(
        dataroot,  # TODO add dataset_name
        batch_size,
        num_classes,
        num_gpus)

    # Model construction
    model_name = CONFIG['model'].name
    model, model_params = create_model(model_name, num_classes)

    # Create optimizers
    logging.debug('\tCreating optimizers...')
    lr = CONFIG['training'].learning_rate
    momentum = CONFIG['training'].momentum
    weight_decay = CONFIG['training'].weight_decay
    optimizer_type = CONFIG['training'].optimizer

    # TODO clean this up so that other_params are extracted automatically
    if optimizer_type == 'SGD':
        other_params = None
    elif optimizer_type == 'NoisySGD':
        other_params = {"noise_factor": CONFIG['noisysgd'].noise_factor}
    elif optimizer_type == 'ReservoirSGD':
        if num_gpus == 0:
            raise ValueError('Need at least one GPU for now for ReservoirSGD!')
        other_params = {
            'scale': CONFIG['reservoir'].scale,
            'max_reservoir_size': CONFIG['reservoir'].max_reservoir_size,
            'num_gradients_to_sample':
            CONFIG['reservoir'].num_gradients_to_sample,
            'distributed': CONFIG['reservoir'].distributed
        }
    elif optimizer_type == 'HessianVecSGD':
        other_params = {"noise_factor": CONFIG['hessian_vec'].noise_factor}
    else:
        raise ValueError('Unsupported optimizer: %s' % optimizer_type)

    create_optimizer = create_optimizer_fn(model_params, momentum,
                                           weight_decay, optimizer_type,
                                           mini_batch_size, other_params)
    optimizer = create_optimizer(lr)

    epoch = 0
    iteration = 0
    # Load previous model checkpoint if it exists
    checkpoint = CONFIG['model'].checkpoint
    if checkpoint is not None:
        logging.info('\tLoading train state from checkpoint: %s' % checkpoint)
        model_params, optimizer = load_checkpoint(checkpoint, model_params,
                                                  optimizer)

    # Calculate number of parameters in model
    num_parameters = sum(p.numel() for p in model_params.values())
    logging.debug('\tNumber of parameters in model: %d' % int(num_parameters))

    # Set up telemetry things
    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')

    # Create log directory for checkpoints
    save_dir = create_log_dirs(CONFIG['logging'].save_dir, batch_size,
                               timestamp, other_params)
    logging.info('\tLogging to: {}'.format(save_dir))

    # Create log/test functions that we use in torchnet engine hooks
    cross_entropy = create_cross_entropy_fn(model, model_name, model_params,
                                            num_gpus)
    log = create_log_fn(model_params, save_dir)

    # Create torchnet engine hooks
    engine = Engine(
        create_graph=CONFIG['training'].optimizer == 'HessianVecSGD',
        mini_batch_size=mini_batch_size)
    epoch_step_orig = CONFIG['training'].epoch_step
    if isinstance(epoch_step_orig, int):
        epoch_step = [epoch_step_orig]
    else:
        epoch_step = list(map(int, CONFIG['training'].epoch_step.split(',')))

    # CALCULATE NUMBER OF EPOCHS BASED ON ITERATIONS
    if hasattr(CONFIG['training'], 'iterations'):
        logging.debug('\tUsing iterations: {}'.format(
            CONFIG['training'].iterations))
        iters_per_epoch = TRAINING_SIZE // batch_size
        batch_period = 1  # batch_size // mini_batch_size
        epochs = batch_period * CONFIG['training'].iterations // iters_per_epoch
        # run at least this many epochs
        epochs = max(epochs, CONFIG['training'].epochs)
    else:
        epochs = CONFIG['training'].epochs
    lr_decay_ratio = CONFIG['training'].lr_decay_ratio

    logging.info('\tRUNNING FOR {} EPOCHS'.format(epochs))

    # on_sample = create_on_sample_fn()
    on_forward = create_on_forward_fn(classacc, meter_loss)
    on_start = create_on_start_fn(epoch, iteration)
    on_start_epoch = create_on_start_epoch_fn(classacc, meter_loss,
                                              timer_train, train_loader,
                                              epoch_step, lr_decay_ratio,
                                              create_optimizer)
    on_update = create_on_update_fn(engine,
                                    cross_entropy,
                                    train_loader,
                                    test_loader,
                                    batch_size,
                                    log,
                                    classacc,
                                    meter_loss,
                                    timer_train,
                                    timer_test,
                                    save_dir,
                                    period=CONFIG['logging'].evaluation_iters,
                                    iterations=CONFIG['training'].iterations)

    # Hook the torchnet engine up
    # engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_start'] = on_start
    engine.hooks['on_update'] = on_update
    # Start the training process!
    engine.train(cross_entropy, train_loader, epochs, optimizer)
Esempio n. 22
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id
    torch.randn(8).cuda()
    os.environ['CUDA_VISIBLE_DEVICES'] = ''
    epoch_step = json.loads(opt.epoch_step)

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    f_s, params_s, stats_s = define_student(opt.depth, opt.width)
    f_t, params_t = define_teacher(opt.teacher_params)
    params = {'student.' + k: v for k, v in params_s.items()}
    stats = {'student.' + k: v for k, v in stats_s.items()}
    params.update({'teacher.' + k: v for k, v in params_t.items()})

    optimizable = [v for v in params.values() if v.requires_grad]

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return torch.optim.SGD(optimizable,
                               lr,
                               0.9,
                               weight_decay=opt.weightDecay)

    optimizer = create_optimizer(opt, opt.lr)

    iter_train = get_iterator(opt, True)
    iter_test = get_iterator(opt, False)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors, stats = state_dict['params'], state_dict['stats']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v.data))
                      for key, v in list(params.items())]))
    print('\nAdditional buffers:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v))
                      for key, v in list(stats.items())]))

    n_parameters = sum([p.numel() for p in optimizable + list(stats.values())])
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(topk=[1, 5], accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    meters_at = [tnt.meter.AverageValueMeter() for i in range(4)]

    def f(inputs, params, stats, mode):
        y_s, g_s = f_s(inputs, params, stats, mode, 'student.')
        y_t, g_t = f_t(inputs, params, 'teacher.')
        return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]

    def h(sample):
        inputs = Variable(sample[0].cuda())
        targets = Variable(sample[1].cuda().long())
        y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats,
                                              sample[2], np.arange(opt.ngpu))
        loss_groups = [v.sum() for v in loss_groups]
        [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)]
        return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \
                + opt.beta * sum(loss_groups), y_s

    def log(t, state):
        torch.save(
            dict(params={k: v.data
                         for k, v in params.items()},
                 stats=stats,
                 optimizer=state['optimizer'].state_dict(),
                 epoch=t['epoch']), os.path.join(opt.save, 'model.pt7'))
        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        [meter.reset() for meter in meters_at]
        state['iterator'] = tqdm(iter_train)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, iter_test)

        print(
            log(
                {
                    "train_loss": train_loss[0],
                    "train_acc": train_acc,
                    "test_loss": meter_loss.value()[0],
                    "test_acc": classacc.value(),
                    "epoch": state['epoch'],
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                    "at_losses": [m.value() for m in meters_at],
                }, state))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, iter_train, opt.epochs, optimizer)
Esempio n. 23
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    torch.manual_seed(opt.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(mode):
        return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode,
                          num_workers=opt.nthread, pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)
     
    if opt.activation_dropout:
        print('[*********] Using activation dropout')
        
    f, params = resnet(opt.depth, opt.width, num_classes, opt.dropout_prob, opt.activation_dropout)

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors = state_dict['params']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    print_tensor_dict(params)

    n_parameters = sum(p.numel() for p in params.values() if p.requires_grad)
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = cast(sample[0], opt.dtype)
        targets = cast(sample[1], 'long')
        y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float()
        return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(dict(params=params, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()),
                   os.path.join(opt.save, 'model.pt7'))
        z = {**vars(opt), **t}
        with open(os.path.join(opt.save, 'log.txt'), 'a') as flog:
            flog.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        loss = float(state['loss'])
        classacc.add(state['output'].data, state['sample'][1])
        meter_loss.add(loss)
        if state['train']:
            state['iterator'].set_postfix(loss=loss)

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        state['iterator'] = tqdm(train_loader, dynamic_ncols=True)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        with torch.no_grad():
            engine.test(h, test_loader)

        test_acc = classacc.value()[0]
        print(log({
            "train_loss": train_loss[0],
            "train_acc": train_acc[0],
            "test_loss": meter_loss.value()[0],
            "test_acc": test_acc,
            "epoch": state['epoch'],
            "num_classes": num_classes,
            "n_parameters": n_parameters,
            "train_time": train_time,
            "test_time": timer_test.value(),
        }, state))
        print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' %
              (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
Esempio n. 24
0
def main():

    ###Initialization
    device = torch.device(args.device)

    My_transform = transforms.Compose([
        transforms.ToTensor(),  # default : range [0, 255] -> [0.0,1.0]
    ])

    Train_data = FashionMnistread(True, transform=My_transform)
    Test_data = FashionMnistread(False, transform=My_transform)

    Train_dataloader = DataLoader(dataset=Train_data,
                                  batch_size=args.n_batches,
                                  shuffle=False)
    Test_dataloader = DataLoader(dataset=Test_data,
                                 batch_size=args.n_batches,
                                 shuffle=False)

    def get_iterator(mode):
        if mode is True:
            return Train_dataloader
        elif mode is False:
            return Test_dataloader

    from torchsummary import summary
    _model = My_Model(num_of_class=args.n_classes)
    _model.to(device)
    summary(_model, input_size=(1, 28, 28))

    optimizer = torch.optim.SGD(_model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    criterion = nn.CrossEntropyLoss()

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)
    confusion_meter = tnt.meter.ConfusionMeter(args.n_classes, normalized=True)

    plotLogger = Visualier(num_classes=args.n_classes)
    writelogger = Customized_Logger(file_name=args.log_file)

    ###End Initialization

    def h(sample):
        data, classes, training = sample

        _model.train() if training else _model.eval()

        labels = torch.LongTensor(classes).to(device)
        data = data.to(device).float()

        f_class = _model(data)
        loss = criterion(f_class, labels)

        p_class = F.softmax(f_class, dim=1)
        return loss, p_class

    def reset_meters():
        classerr.reset()
        meter_loss.reset()
        confusion_meter.reset()

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classerr.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        confusion_meter.add(state['output'].data,
                            torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].item())

    def on_start_epoch(state):
        reset_meters()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):

        train_acc = classerr.value()[0]
        train_err = meter_loss.value()[0]

        # do validation at the end of each epoch
        reset_meters()
        engine.test(h, get_iterator(False))

        val_acc = classerr.value()[0]
        val_err = meter_loss.value()[0]
        plotLogger.plot(train_acc=train_acc,
                        train_err=train_err,
                        val_acc=val_acc,
                        val_err=val_err,
                        confusion=confusion_meter.value(),
                        epoch=state['epoch'])
        writelogger.update(train_acc=train_acc,
                           train_err=train_err,
                           val_acc=val_acc,
                           val_err=val_err,
                           epoch=state['epoch'],
                           model=_model)

    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h,
                 get_iterator(True),
                 maxepoch=args.n_epoches,
                 optimizer=optimizer)
Esempio n. 25
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(train):
        return DataLoader(create_dataset(opt, train), batch_size=opt.batch_size, shuffle=train,
                          num_workers=opt.nthread, pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    f, params, stats = resnet(opt.depth, opt.width, num_classes)

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD(params.values(), lr, 0.9, weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors, stats = state_dict['params'], state_dict['stats']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    print_tensor_dict(params)
    print('\nAdditional buffers:')
    print_tensor_dict(stats)

    n_parameters = sum(p.numel() for p in params.values())
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        inputs = Variable(cast(sample[0], opt.dtype))
        targets = Variable(cast(sample[1], 'long'))
        y = data_parallel(f, inputs, params, stats, sample[2], list(range(opt.ngpu)))
        return F.cross_entropy(y, targets), y

    def log(t, state):
        torch.save(dict(params={k: v.data for k, v in params.items()},
                        stats=stats,
                        optimizer=state['optimizer'].state_dict(),
                        epoch=t['epoch']),
                   open(os.path.join(opt.save, 'model.pt7'), 'wb'))
        z = vars(opt).copy(); z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classacc.add(state['output'].data, torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        state['iterator'] = tqdm(train_loader)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, test_loader)

        test_acc = classacc.value()[0]
        print(log({
            "train_loss": train_loss[0],
            "train_acc": train_acc[0],
            "test_loss": meter_loss.value()[0],
            "test_acc": test_acc,
            "epoch": state['epoch'],
            "num_classes": num_classes,
            "n_parameters": n_parameters,
            "train_time": train_time,
            "test_time": timer_test.value(),
        }, state))
        print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \
              (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)
Esempio n. 26
0
def main():
    args = get_args()

    device = torch.device("cuda:1")
    # device = torch.device("cpu")
    model = SequenceEncoder(3, 2, device)

    n_data = 10
    data = get_toydata(n_data, device)
    teacher = [reverse_tensor(seq, device) for seq in data]
    training_data = (data, teacher)

    optim_params = {
        "params": model.parameters(),
        "weight_decay": args.weight_decay,
        "lr": args.lr,
    }
    optimizer = torch.optim.Adam(**optim_params)

    meter_loss = torchnet.meter.AverageValueMeter()
    port = 8097
    train_loss_logger = VisdomPlotLogger(
        'line', port=port, opts={'title': 'encoder_toy - train loss'})

    def network(sample):
        x = sample[0]  # sequence
        t = sample[1]  # target sequence
        y, mu, logvar = model(x)
        loss = get_loss(y, t, mu, logvar)
        o = y, mu, logvar
        return loss, o

    def reset_meters():
        meter_loss.reset()

    def on_sample(state):
        state['sample'] = list(state['sample'])
        state['sample'].append(state['train'])
        model.zero_grad()
        model.init_hidden()

    def on_forward(state):
        loss_value = state['loss'].data
        meter_loss.add(state['loss'].data)

    def on_start_epoch(state):
        reset_meters()
        if 'dataset' not in state:
            dataset = state['iterator']
            state['dataset'] = dataset
        dataset = state['dataset']
        state['iterator'] = tqdm(zip(*dataset))

    def on_end_epoch(state):
        loss_value = meter_loss.value()[0]
        epoch = state['epoch']
        print(f'loss[{epoch}]: {loss_value:.4f}')
        train_loss_logger.log(epoch, loss_value)
        dataset = state['dataset']
        state['iterator'] = tqdm(zip(*dataset))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch

    engine.train(network,
                 training_data,
                 maxepoch=args.epochs,
                 optimizer=optimizer)
Esempio n. 27
0
        else:
            state_dict = checkpoint
        model.load_state_dict(state_dict=state_dict, strict=False)
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, args.start_epoch - 1))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))
print(args)

if len(args.gpus) > 0:
    model.cuda()
    cudnn.benchmark = True
    if len(args.gpus) > 1:
        model = nn.DataParallel(model, device_ids=args.gpus).cuda()

engine = Engine()
meter_loss = tnt.meter.AverageValueMeter()
topk = [1, 5]
classerr = tnt.meter.ClassErrorMeter(topk=topk,
                                     accuracy=False)  # default is also False
confusion_meter = tnt.meter.ConfusionMeter(num_classes[args.dataset],
                                           normalized=True)

if args.visdom:
    if args.log_name == '':
        args.log_name = args.build_type

    train_loss_logger = VisdomPlotLogger(
        'line', opts={'title': '[{}] Train Loss'.format(args.log_name)})
    train_err_logger = VisdomPlotLogger(
        'line', opts={'title': '[{}] Train Class Error'.format(args.log_name)})
Esempio n. 28
0
    from torchnet.engine import Engine
    from torchnet.logger import VisdomPlotLogger, VisdomLogger
    from torchvision.utils import make_grid
    from torchvision.datasets.mnist import MNIST
    from tqdm import tqdm
    import torchnet as tnt

    model = CapsuleNet()
    # model.load_state_dict(torch.load('epochs/epoch_327.pt'))
    model.cuda()

    print("# parameters:", sum(param.numel() for param in model.parameters()))

    optimizer = Adam(model.parameters())

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True)
    confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True)

    train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'})
    train_error_logger = VisdomPlotLogger('line',
                                          opts={'title': 'Train Accuracy'})
    test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'})
    test_accuracy_logger = VisdomPlotLogger('line',
                                            opts={'title': 'Test Accuracy'})
    confusion_logger = VisdomLogger('heatmap',
                                    opts={
                                        'title': 'Confusion matrix',
                                        'columnnames':
                                        list(range(NUM_CLASSES)),
Esempio n. 29
0
def main():
    """Train a simple Hybrid Scattering + CNN model on MNIST.

    Scattering features are normalized by batch normalization.
    The model achieves 99.6% testing accuracy after 10 epochs.
    """
    meter_loss = tnt.meter.AverageValueMeter()
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)

    scat = Scattering(M=28, N=28, J=2).cuda()
    K = 81

    params = {
        'conv1.weight':     conv_init(K, 64, 1),
        'conv1.bias':       torch.zeros(64),
        'bn.weight':        torch.Tensor(K).uniform_(),
        'bn.bias':          torch.zeros(K),
        'linear2.weight':   linear_init(64*7*7, 512),
        'linear2.bias':     torch.zeros(512),
        'linear3.weight':   linear_init(512, 10),
        'linear3.bias':     torch.zeros(10),
    }

    stats = {'bn.running_mean': torch.zeros(K).cuda(),
             'bn.running_var': torch.ones(K).cuda()}

    for k, v in params.items():
        params[k] = Variable(v.cuda(), requires_grad=True)

    def h(sample):
        x = scat(sample[0].float().cuda().unsqueeze(1) / 255.0).squeeze(1)
        inputs = Variable(x)
        targets = Variable(torch.LongTensor(sample[1]).cuda())
        o = f(inputs, params, stats, sample[2])
        return F.cross_entropy(o, targets), o

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classerr.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start_epoch(state):
        classerr.reset()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):
        print 'Training accuracy:', classerr.value()

    def on_end(state):
        print 'Training' if state['train'] else 'Testing', 'accuracy'
        print classerr.value()

    optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9,
                                weight_decay=0.0005)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_end'] = on_end
    print 'Training:'
    engine.train(h, get_iterator(True), 10, optimizer)
    print 'Testing:'
    engine.test(h, get_iterator(False))
                            num_workers=12,
                            batch_size=64,
                            shuffle=False)

    model = Net(upscale_factor=UPSCALE_FACTOR)
    criterion = AdjacentFrameLoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    print('# parameters:', sum(param.numel() for param in model.parameters()))

    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)

    engine = Engine()
    meter_loss = tnt.meter.AverageValueMeter()
    meter_psnr = PSNRMeter()

    train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'})
    train_psnr_logger = VisdomPlotLogger('line', opts={'title': 'Train PSNR'})
    val_loss_logger = VisdomPlotLogger('line', opts={'title': 'Val Loss'})
    val_psnr_logger = VisdomPlotLogger('line', opts={'title': 'Val PSNR'})

    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch

    engine.train(processor,
                 train_loader,
Esempio n. 31
0
def main(n):
    viz = Visdom()
    params = {
        'conv0.weight': conv_init(1, 50, 5),
        'conv0.bias': torch.zeros(50),
        'conv1.weight': conv_init(50, 50, 5),
        'conv1.bias': torch.zeros(50),
        'linear2.weight': linear_init(800, 512),
        'linear2.bias': torch.zeros(512),
        'linear3.weight': linear_init(512, 10),
        'linear3.bias': torch.zeros(10),
    }  # 创建参数字典 conv_init 和 linear_init 采用 He正规
    params = {k: Variable(v, requires_grad=True) for k, v in params.items()}
    # torch.autograd.Variable     Tensor 转 Variable
    if n == 1:
        optimizer = torch.optim.SGD(params.values(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=0.0005)
    if n == 2:
        optimizer = torch.optim.Adam(params.values(),
                                     lr=0.001,
                                     betas=(0.9, 0.99))
    if n == 3:
        optimizer = torch.optim.RMSprop(params.values(), lr=0.01, alpha=0.9)
    # 方法:SGD
    engine = Engine()
    # Engine给训练过程提供了一个模板,该模板建立了model,DatasetIterator,Criterion和Meter之间的联系
    meter_loss = tnt.meter.AverageValueMeter()  # 用于统计任意添加的变量的方差和均值,可以用来测量平均损失等
    classerr = tnt.meter.ClassErrorMeter(accuracy=True)  # 该meter用于统计分类误差
    confusion_meter = tnt.meter.ConfusionMeter(10,
                                               normalized=True)  # 多类之间的混淆矩阵

    port = 8097  # 端口

    train_loss_logger = VisdomPlotLogger('line', port=port, opts={}, win='102')
    # 定义win,name不能在这里设置,应该在这里的opts把标签legend设置完毕:
    viz.update_window_opts(
        win='101',
        opts=dict(
            legend=['Apples', 'Pears'],
            xtickmin=0,
            xtickmax=1,
            xtickstep=0.5,
            ytickmin=0,
            ytickmax=1,
            ytickstep=0.5,
            markersymbol='cross-thin-open',
        ),
    )

    # train_loss 折线
    train_err_logger = VisdomPlotLogger('line',
                                        port=port,
                                        opts={'title': 'Train Class Error'
                                              })  # train_err 折线
    test_loss_logger = VisdomPlotLogger('line',
                                        port=port,
                                        opts={'title':
                                              'Test Loss'})  # test_loss 折线
    test_err_logger = VisdomPlotLogger(
        'line',
        port=port,
        opts={'title': 'Test Class Error'},
    )  # test_err 折线
    confusion_logger = VisdomLogger('heatmap',
                                    port=port,
                                    opts={
                                        'title': 'Confusion matrix',
                                        'columnnames': list(range(10)),
                                        'rownames': list(range(10))
                                    })

    # 误判信息

    def h(sample):  # 数据获取, f(参数,输入,mode), o为结果
        inputs = Variable(sample[0].float() / 255.0)
        targets = Variable(torch.LongTensor(sample[1]))
        o = f(params, inputs, sample[2])
        return F.cross_entropy(o, targets), o  # 返回Loss,o

    def reset_meters():  # meter重置
        classerr.reset()
        meter_loss.reset()
        confusion_meter.reset()

    # hooks = {
    # ['on_start'] = function() end, --用于训练开始前的设置和初始化
    # ['on_start_epoch'] = function()end, -- 每一个epoch前的操作
    # ['on_sample'] = function()end, -- 每次采样一个样本之后的操作
    # ['on_forward'] = function()end, -- 在model: forward()之后的操作
    # ?['onForwardCriterion'] = function()end, -- 前向计算损失函数之后的操作
    # ?['onBackwardCriterion'] = function()end, -- 反向计算损失误差之后的操作
    # ['on_backward'] = function()end, -- 反向传递误差之后的操作
    # ['on_update'] = function()end, -- 权重参数更新之后的操作
    # ['on_end_epoch'] = function()end, -- 每一个epoch结束时的操作
    # ['on_end'] = function()end, -- 整个训练过程结束后的收拾现场
    # }

    # state = {
    # ['network'] = network, --设置了model
    # ['criterion'] = criterion, -- 设置损失函数
    # ['iterator'] = iterator, -- 数据迭代器
    # ['lr'] = lr, -- 学习率
    # ['lrcriterion'] = lrcriterion, --
    # ['maxepoch'] = maxepoch, --最大epoch数
    # ['sample'] = {}, -- 当前采集的样本,可以在onSample中通过该阈值查看采样样本
    # ['epoch'] = 0, -- 当前的epoch
    # ['t'] = 0, -- 已经训练样本的个数
    # ['training'] = true - - 训练过程
    # }

    # def train(self, network, iterator, maxepoch, optimizer):
    # state = {
    #      'network': network,
    #      'iterator': iterator,
    #      'maxepoch': maxepoch,
    #      'optimizer': optimizer,
    #      'epoch': 0,      # epoch
    #      't': 0,          # sample
    #      'train': True,
    #     }
    def on_sample(state):  # 每次采样一个样本之后的操作
        state['sample'].append(state['train'])  # 样本采集之后训练
        if state.get('epoch') != None and state['t'] > 10:
            if n == 1:
                train_loss_logger.log(state['t'],
                                      meter_loss.value()[0],
                                      name="SGD")
            if n == 2:
                train_loss_logger.log(state['t'],
                                      meter_loss.value()[0],
                                      name="Adam")
            if n == 3:
                train_loss_logger.log(state['t'],
                                      meter_loss.value()[0],
                                      name="RMSprop")
        reset_meters()

    def on_forward(state):  # 在model: forward()之后的操作
        classerr.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        confusion_meter.add(state['output'].data,
                            torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

    def on_start_epoch(state):  # 每一个epoch前的操作
        reset_meters()
        state['iterator'] = tqdm(state['iterator'])

    def on_end_epoch(state):  # 每一个epoch结束时的操作
        print('Training loss: %.4f, accuracy: %.2f%%' %
              (meter_loss.value()[0], classerr.value()[0]))
        # train_loss_logger.log(state['epoch'], meter_loss.value()[0])
        # train_err_logger.log(state['epoch'], classerr.value()[0])

        # do validation at the end of each epoch
        reset_meters()
        engine.test(h, get_iterator(False))
        # test_loss_logger.log(state['epoch'], meter_loss.value()[0])
        # test_err_logger.log(state['epoch'], classerr.value()[0])
        # confusion_logger.log(confusion_meter.value())
        print('Testing loss: %.4f, accuracy: %.2f%%' %
              (meter_loss.value()[0], classerr.value()[0]))

    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.train(h, get_iterator(True), maxepoch=1, optimizer=optimizer)
Esempio n. 32
0
def main():
    st = time.time()
    opt = parser.parse_args()
    epoch_step = json.loads(opt.epoch_step)
    print('parsed options:', vars(opt))

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    epoch_step = json.loads(opt.epoch_step)

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    f_s, params_s = define_student(opt.depth, opt.width)
    f_t, params_t = define_teacher(opt.teacher_params)
    params = {'student.'+k: v for k, v in params_s.items()}
    params.update({'teacher.'+k: v for k, v in params_t.items()})

    params = OrderedDict((k, p.cuda().detach().requires_grad_(p.requires_grad)) for k, p in params.items())

    optimizable = [v for v in params.values() if v.requires_grad]
    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD(optimizable, lr, momentum=0.9, weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    iter_train = get_iterator(opt.imagenetpath, opt.batch_size, opt.nthread, True)
    iter_test = get_iterator(opt.imagenetpath, opt.batch_size, opt.nthread, False)

    epoch = 0
    if opt.resume != '':
        state_dict = torch.load(opt.resume)
        epoch = state_dict['epoch']
        params_tensors = state_dict['params']
        for k, v in params.items():
            v.data.copy_(params_tensors[k])
        optimizer.load_state_dict(state_dict['optimizer'])

    print('\nParameters:')
    utils.print_tensor_dict(params)


    n_parameters = sum(p.numel() for p in optimizable)
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(topk=[1, 5], accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    meters_at = [tnt.meter.AverageValueMeter() for i in range(4)]

    def f(inputs, params, mode):
        y_s, g_s = f_s(inputs, params, mode, 'student.')
        with torch.no_grad():
            y_t, g_t = f_t(inputs, params, 'teacher.')
        return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)]

    def h(sample):
        inputs, targets, mode = sample
        inputs = inputs.cuda().detach()
        targets = targets.cuda().long().detach()
        y_s, y_t, loss_groups = utils.data_parallel(f, inputs, params, mode, range(opt.ngpu))
        loss_groups = [v.sum() for v in loss_groups]
        [m.add(v.item()) for m,v in zip(meters_at, loss_groups)]
        return utils.distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \
                + opt.beta * sum(loss_groups), y_s

    def log(t, state):
        torch.save(dict(params={k: v.data for k, v in params.items()},
                        optimizer=state['optimizer'].state_dict(),
                        epoch=t['epoch']),
                   os.path.join(opt.save, 'model.pt7'))
        z = vars(opt).copy(); z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        classacc.add(state['output'].data, state['sample'][1])
        loss = state['loss'].item()
        meter_loss.add(loss)
        if state['train']:
            state['iterator'].set_postfix(loss=loss)

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()
        [meter.reset() for meter in meters_at]
        state['iterator'] = tqdm(iter_train, dynamic_ncols=True)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, iter_test)

        print(log({
            "train_loss": train_loss[0],
            "train_acc": train_acc,
            "test_loss": meter_loss.value()[0],
            "test_acc": classacc.value(),
            "epoch": state['epoch'],
            "n_parameters": n_parameters,
            "train_time": train_time,
            "test_time": timer_test.value(),
            "at_losses": [m.value() for m in meters_at],
           }, state))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, iter_train, opt.epochs, optimizer)

    print("total time: {}".format(time.time()-st))
Esempio n. 33
0
def main():
    opt = parser.parse_args()
    print('parsed options:', vars(opt))
    epoch_step = json.loads(opt.epoch_step)
    num_classes = 10 if opt.dataset == 'CIFAR10' else 100

    torch.manual_seed(opt.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id

    def create_iterator(mode):
        return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode,
                          num_workers=opt.nthread, pin_memory=torch.cuda.is_available())

    train_loader = create_iterator(True)
    test_loader = create_iterator(False)

    f_1, params_1 = resnet(opt.depth, opt.width, num_classes)
    f_2, params_2 = resnet(opt.depth, opt.width, num_classes)

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = ', lr)
        return SGD([v for v in params_1.values() if v.requires_grad] + [v for v in params_2.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay)

    optimizer = create_optimizer(opt, opt.lr)

    epoch = 0
    if opt.resume != '':
        raise NotImplementedError

    print('\nParameters:')
    print_tensor_dict(params_1)
    print_tensor_dict(params_2)

    n_parameters = sum([p.numel() for p in params_1.values() if p.requires_grad] + [p.numel() for p in params_2.values() if p.requires_grad])
    print('\nTotal number of parameters:', n_parameters)

    meter_loss = tnt.meter.AverageValueMeter()
    classacc = tnt.meter.ClassErrorMeter(accuracy=True)
    timer_train = tnt.meter.TimeMeter('s')
    timer_test = tnt.meter.TimeMeter('s')
    classacc_ep1 = tnt.meter.ClassErrorMeter(accuracy=True)
    classacc_ep2 = tnt.meter.ClassErrorMeter(accuracy=True)

    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    def h(sample):
        global _outputs, _loss

        connection_map = np.array([
            [0,0,0, 1,1,1],
            [0,0,0, 1,1,1],
            [0,0,0, 1,1,1],

            [1,1,1, 0,0,0],
            [1,1,1, 0,0,0],
            [1,1,1, 0,0,0]])

        inputs = cast(sample[0], opt.dtype)
        targets = cast(sample[1], 'long')
        net1_outputs = data_parallel(f_1, inputs, params_1, sample[2], list(range(opt.ngpu)))
        net2_outputs = data_parallel(f_2, inputs, params_2, sample[2], list(range(opt.ngpu)))
        net1_outputs = [o.float() for o in net1_outputs]
        net2_outputs = [o.float() for o in net2_outputs]

        _loss = []

        # hard supervision
        for i, o in enumerate(net1_outputs):
            _loss.append(F.cross_entropy(o, targets))

        for i, o in enumerate(net2_outputs):
            _loss.append(F.cross_entropy(o, targets))

        outputs = net1_outputs + net2_outputs
        # soft supervision
        for i, o in enumerate(outputs):
            for j, o2 in enumerate(outputs):
                if connection_map[i,j] > 0:
                    _loss.append(KL_divergence(o2.detach(),o))

        loss = sum(_loss)
        _outputs = net1_outputs

        return loss, net1_outputs[-1]

    def log(t, state):
        torch.save(dict(params=params_1, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()),
                   os.path.join(opt.save, 'model.pt7'))
        z = {**vars(opt), **t}
        with open(os.path.join(opt.save, 'log.txt'), 'a') as flog:
            flog.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        state['sample'].append(state['train'])

    def on_forward(state):
        loss = float(state['loss'])
        classacc.add(state['output'].data, state['sample'][1])
        classacc_ep1.add(_outputs[0].data, state['sample'][1])
        classacc_ep2.add(_outputs[1].data, state['sample'][1])
        meter_loss.add(loss)
        if state['train']:
            state['iterator'].set_postfix(loss=loss)

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        classacc_ep1.reset()
        classacc_ep2.reset()
        meter_loss.reset()
        timer_train.reset()
        state['iterator'] = tqdm(train_loader, dynamic_ncols=True)

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        train_acc_ep1 = classacc_ep1.value()
        train_acc_ep2 = classacc_ep2.value()

        meter_loss.reset()
        classacc.reset()
        timer_test.reset()
        classacc_ep1.reset()
        classacc_ep2.reset()

        with torch.no_grad():
            engine.test(h, test_loader)

        test_acc = classacc.value()[0]
        test_acc_ep1 = classacc_ep1.value()[0]
        test_acc_ep2 = classacc_ep2.value()[0]
        print(log({
            "train_loss": train_loss[0],
            "train_acc": train_acc[0],
            "test_loss": meter_loss.value()[0],
            "test_acc": test_acc,
            "train_acc_ep1": train_acc_ep1[0],
            "train_acc_ep2": train_acc_ep2[0],
            "test_acc_ep1": test_acc_ep1,
            "test_acc_ep2": test_acc_ep2,

            "epoch": state['epoch'],
            "num_classes": num_classes,
            "n_parameters": n_parameters,
            "train_time": train_time,
            "test_time": timer_test.value(),
        }, state))
        print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' %
              (opt.save, state['epoch'], opt.epochs, test_acc))

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, train_loader, opt.epochs, optimizer)