if not k in new: new[k] = torch.Tensor(1) new[k][0] = -valid['.'.join(parts[:-1] + ['weight'])].min() / 2 return new #dct = torch.load('resnet.pth') #dct = fix_state(model.state_dict(), dct) #model.load_state_dict(dct) if use_cuda: model.cuda() criterion = F.cross_entropy optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) def train(epoch): model.train() running_loss = 0 running_total = 0 correct = 0 for i, (inputs, labels) in enumerate(trainloader): if use_cuda: inputs, labels = inputs.cuda(), labels.cuda() # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels)
if __name__ == "__main__": parser = argparse.ArgumentParser(description='pytorch SqueezeNet on CUDA') parser.add_argument('--data_path', type=str, default="./data", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="./checkpoint", help='path where the checkpoint to be saved') parser.add_argument('--device_id', type=int, default=0, help='device id of GPU. (Default: 0)') args = parser.parse_args() device = torch.device('cuda:'+str(args.device_id)) version = '1.0' network = SqueezeNet(cfg.num_classes, version) network.to(device) criterion = LabelSmoothingCrossEntropy(reduction="mean", epsilon=cfg.label_smoothing_eps) optimizer = optim.RMSprop(network.parameters(), lr=cfg.lr_init, eps=cfg.rmsprop_epsilon, momentum=cfg.rmsprop_momentum, alpha=cfg.rmsprop_decay) dataloader = create_dataset_pytorch(args.data_path, is_train=True) step_per_epoch = len(dataloader) scheduler = optim.lr_scheduler.StepLR( optimizer, gamma=cfg.lr_decay_rate, step_size=cfg.lr_decay_epoch*step_per_epoch) # scheduler = optim.lr_scheduler.ExponentialLR( # optimizer, # gamma=cfg.lr_decay_rate) q_ckpt = Queue(maxsize=cfg.keep_checkpoint_max)