def build_model(self): # build network if self.network_config == "LeNet": self.network=LeNet() elif self.network_config == "ResNet": #self.network=ResNet18() self.network=ResNetSplit18(1) # set up optimizer self.optimizer = torch.optim.SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum) self.criterion = torch.nn.CrossEntropyLoss()
def build_model(self, num_classes=10): # build network if self.network_config == "LeNet": self.network = LeNet() elif self.network_config == "ResNet18": self.network = ResNet18(num_classes=num_classes) elif self.network_config == "ResNet34": self.network = ResNet34(num_classes=num_classes) elif self.network_config == "FC": self.network = FC_NN() elif self.network_config == "DenseNet": self.network = DenseNet(growthRate=40, depth=190, reduction=0.5, bottleneck=True, nClasses=10) elif self.network_config == "VGG11": self.network = vgg11_bn(num_classes) elif self.network_config == "AlexNet": self.network = alexnet(num_classes=10) # set up optimizer self.optimizer = torch.optim.SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum) self.criterion = nn.CrossEntropyLoss() # assign a buffer for receiving models from parameter server self.init_recv_buf() # enable GPU here if self._enable_gpu: self.network.cuda()
def build_model(self, num_classes=10): # build network if self.network_config == "LeNet": self.network = LeNet() elif self.network_config == "ResNet18": self.network = ResNet18(num_classes=num_classes) elif self.network_config == "ResNet34": self.network = ResNet34(num_classes=num_classes) elif self.network_config == "FC": self.network = FC_NN() elif self.network_config == "DenseNet": self.network = DenseNet(growthRate=40, depth=190, reduction=0.5, bottleneck=True, nClasses=10) elif self.network_config == "VGG11": self.network = vgg11_bn(num_classes) elif self.network_config == "AlexNet": self.network = alexnet(num_classes=10) # TODO(hwang): make sure this is useful self.optimizer = SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum) # assign a gradient accumulator to collect gradients from workers self.grad_accumulator = GradientAccumulator(self.network, self.world_size - 1, self._compress) self.init_model_shapes() # enable GPU here if self._enable_gpu: self.network.cuda()
def build_model(model_name): # build network if model_name == "LeNet": return LeNet() elif model_name == "ResNet18": return ResNet18() elif model_name == "ResNet34": return ResNet34() elif model_name == "ResNet50": return ResNet50()
def build_model(model_name, num_classes): # build network if model_name == "LeNet": return LeNet() elif model_name == "ResNet18": return ResNet18(num_classes) elif model_name == "ResNet34": return ResNet34() elif model_name == "ResNet50": return ResNet50() elif model_name == "VGG11": return vgg11_bn(num_classes)
def _load_model(self, file_path): #self.network = build_model(self.network_config, num_classes=10) # build network if self.network_config == "LeNet": self.network=LeNet() elif self.network_config == "ResNet18": self.network=ResNet18(num_classes=num_classes) elif self.network_config == "ResNet34": self.network=ResNet34(num_classes=num_classes) elif self.network_config == "FC": self.network=FC_NN() with open(file_path, "rb") as f_: self.network.load_state_dict(torch.load(f_))
def _load_model(self, file_path): #self.network = build_model(self.network_config, num_classes=10) # build network if self.network_config == "LeNet": self.network=LeNet() elif self.network_config == "ResNet18": self.network=ResNet18(num_classes=num_classes) elif self.network_config == "ResNet34": self.network=ResNet34(num_classes=num_classes) elif self.network_config == "FC": self.network=FC_NN() elif self.network_config == "DenseNet": self.network=DenseNet(growthRate=40, depth=190, reduction=0.5, bottleneck=True, nClasses=10) elif self.network_config == "VGG11": self.network=vgg11_bn(num_classes) elif self.network_config == "AlexNet": self.network=alexnet(num_classes=10) with open(file_path, "rb") as f_: self.network.load_state_dict(torch.load(f_))
class NN_Trainer(object): def __init__(self, **kwargs): self.batch_size = kwargs['batch_size'] self.lr = kwargs['learning_rate'] self.max_epochs = kwargs['max_epochs'] self.momentum = kwargs['momentum'] self.network_config = kwargs['network'] def build_model(self): # build network if self.network_config == "LeNet": self.network = LeNet() elif self.network_config == "ResNet": #self.network=ResNet18() self.network = ResNetSplit18(1) # set up optimizer self.optimizer = torch.optim.SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum) self.criterion = torch.nn.CrossEntropyLoss() def train_and_validate(self, train_loader, test_loader): # iterate of epochs for i in range(self.max_epochs): # change back to training mode self.network.train() for batch_idx, (data, y_batch) in enumerate(train_loader): iter_start_time = time.time() data, target = Variable(data), Variable(y_batch) self.optimizer.zero_grad() ################# backward on normal model ############################ ''' logits = self.network(data) loss = self.criterion(logits, target) ''' ####################################################################### ################ backward on splitted model ########################### logits = self.network(data) logits_1 = Variable(logits.data, requires_grad=True) loss = self.criterion(logits_1, target) loss.backward() self.network.backward_single(logits_1.grad) ####################################################################### tmp_time_0 = time.time() for param in self.network.parameters(): # get gradient from layers here # in this version we fetch weights at once # remember to change type here, which is essential grads = param.grad.data.numpy().astype(np.float64) duration_backward = time.time() - tmp_time_0 tmp_time_1 = time.time() self.optimizer.step() duration_update = time.time() - tmp_time_1 # calculate training accuracy prec1, prec5 = accuracy(logits.data, y_batch, topk=(1, 5)) # load the training info print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} Prec@1: {} Prec@5: {} Time Cost: {}' .format(i, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0], prec1.numpy()[0], prec5.numpy()[0], time.time() - iter_start_time)) # we evaluate the model performance on end of each epoch self.validate(test_loader) def validate(self, test_loader): self.network.eval() test_loss = 0 correct = 0 prec1_counter_ = prec5_counter_ = batch_counter_ = 0 for data, y_batch in test_loader: data, target = Variable(data, volatile=True), Variable(y_batch) output = self.network(data) test_loss += F.nll_loss( output, target, size_average=False).data[0] # sum up batch loss #pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability #correct += pred.eq(target.data.view_as(pred)).cpu().sum() prec1_tmp, prec5_tmp = accuracy(output.data, y_batch, topk=(1, 5)) prec1_counter_ += prec1_tmp.numpy()[0] prec5_counter_ += prec5_tmp.numpy()[0] batch_counter_ += 1 prec1 = prec1_counter_ / batch_counter_ prec5 = prec5_counter_ / batch_counter_ test_loss /= len(test_loader.dataset) print('Test set: Average loss: {:.4f}, Prec@1: {} Prec@5: {}'.format( test_loss, prec1, prec5))
class NN_Trainer(object): def __init__(self, **kwargs): self.batch_size = kwargs['batch_size'] self.lr = kwargs['learning_rate'] self.max_epochs = kwargs['max_epochs'] self.momentum = kwargs['momentum'] self.network_config = kwargs['network'] def build_model(self): # build network if self.network_config == "LeNet": self.network = LeNet() elif self.network_config == "ResNet": self.network = ResNet18() # set up optimizer self.optimizer = torch.optim.SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum) self.criterion = torch.nn.CrossEntropyLoss() def train(self, train_loader): self.network.train() # iterate of epochs for i in range(self.max_epochs): for batch_idx, (data, y_batch) in enumerate(train_loader): iter_start_time = time.time() data, target = Variable(data), Variable(y_batch) self.optimizer.zero_grad() logits = self.network(data) loss = self.criterion(logits, target) tmp_time_0 = time.time() loss.backward() #for key_name, param in self.network.state_dict().items(): # print(param) # print("----------------------------------------------------------------") #exit() for param in self.network.parameters(): # get gradient from layers here # in this version we fetch weights at once # remember to change type here, which is essential grads = param.grad.data.numpy().astype(np.float64) duration_backward = time.time() - tmp_time_0 tmp_time_1 = time.time() self.optimizer.step() duration_update = time.time() - tmp_time_1 print("backward duration: {}".format(duration_backward)) print("update duration: {}".format(duration_update)) # calculate training accuracy prec1, prec5 = accuracy(logits.data, y_batch, topk=(1, 5)) # load the training info print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} Prec@1: {} Prec@5: {} Time Cost: {}' .format(i, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0], prec1.numpy()[0], prec5.numpy()[0], time.time() - iter_start_time))