net = ParallelCaffeNet(net.cuda(), device_ids=device_ids) else: print('---- Single GPU ----') net.cuda() print(net) optimizer = optim.SGD(net.parameters(), lr=base_lr, momentum=momentum, weight_decay=weight_decay) if args.snapshot: state = torch.load(args.snapshot) start_epoch = state['batch'] + 1 net.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) print('loaded state %s' % (args.snapshot)) net.train() lr = adjust_learning_rate(optimizer, 0) logging('[0] init_lr = %f' % lr) for batch in range(max_iter): if batch in stepvalues: lr = adjust_learning_rate(optimizer, batch) logging('[%d] lr = %f' % (batch, lr)) if (batch + 1) % test_interval == 0: net.eval() average_accuracy = 0.0 average_loss = 0.0
batch_size=batch_size, shuffle=True, **kwargs) model = CaffeNet(protofile) model_loss = nn.CrossEntropyLoss() model.print_network() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu model = torch.nn.DataParallel(model).cuda() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay) if args.weights: state = torch.load(args.weights) start_epoch = state['epoch']+1 model.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) print('loaded state %s' % (args.weights)) def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if args.gpu: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = model_loss(output, target) loss.backward() optimizer.step() if batch_idx % log_interval == 0: