def train(epochs, train_loader, dev_loader, lr, seed, log_interval, output_dir): """Train the model. Store snapshot models in the output_dir alongside evaluations on the dev set after each epoch """ model = Net() optimizer = optim.Adam(model.parameters(), lr=lr) measure_size(model) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") print("Using device: ", device) if use_cuda: torch.cuda.manual_seed(seed) else: torch.manual_seed(seed) #torch.backends.cudnn.benchmark = False #torch.backends.cudnn.deterministic = True model.to(device) for epoch in range(1, epochs): model.train() total_loss = 0.0 for batch_idx, (data, target) in enumerate(train_loader): if use_cuda: data, target = data.to(device), target.to(device) data = data.unsqueeze_(1) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) total_loss += loss.item() loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) print("Total loss = %.6f" % (total_loss / len(train_loader.dataset))) test(model, dev_loader, os.path.join(output_dir, 'dev-eer-' + str(epoch))) torch.save(model, os.path.join(output_dir, 'iter' + str(epoch) + '.mdl'))
if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) opt.start_epoch = checkpoint["epoch"] + 1 print(opt.start_epoch) model.load_state_dict(checkpoint["model"].state_dict()) else: print("=> no checkpoint found at '{}'".format(opt.resume)) if cuda: model = model.cuda() criterion = criterion.cuda() optimizer = optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.999), eps=1e-8) for epoch in range(opt.start_epoch, opt.nEpochs + 1): train(model, epoch) # learning rate is decayed by a factor of 2 every 200 epochs if (epoch + 1) % 500 == 0: for param_group in optimizer.param_groups: param_group['lr'] /= 10.0 print('Learning rate decay: lr={}'.format( optimizer.param_groups[0]['lr'])) if (epoch + 1) % (opt.snapshots) == 0:
import torch import torch.optim as optim import torch.nn as nn from torch.autograd import Variable from config import MODEL_PATH, DATA_PATH from models.model import transform, Net from data import loadTrainData trainset, trainloader = loadTrainData() net = Net() if os.path.exists(MODEL_PATH): net.load_state_dict(torch.load(MODEL_PATH)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize
transform=image_transforms['valid']) } # Dataloader iterators, make sure to shuffle dataloaders = { 'train': DataLoader(data['train'], batch_size = 64, shuffle = True, num_workers = 8, pin_memory = True), 'valid': DataLoader(data['valid'], batch_size =64, shuffle = True, num_workers = 8, pin_memory = True), } net = Net() print(net) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-5) epoches=300 eval_acc_list = [] is_best = False ###训练网络 for epoch in range(epoches): train_loss = 0. train_acc = 0. for inputs, targets in dataloaders['train']: #get the inputs inputs = inputs.to(device) targets=targets.to(device) #zero the parameter gradients
class Trainer(object): def __init__(self, args): self.args = args self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.prepare_data() self.setup_train() def prepare_data(self): train_val = MnistDataset( self.args.train_image_file, self.args.train_label_file, transform=transforms.Compose([ToTensor()]), ) train_len = int(0.8 * len(train_val)) train_ds, val_ds = torch.utils.data.random_split( train_val, [train_len, len(train_val) - train_len] ) print("Train {}, val {}".format(len(train_ds), len(val_ds))) self.train_loader = torch.utils.data.DataLoader( train_ds, batch_size=self.args.batch_size, collate_fn=collate_fn, shuffle=True, ) self.val_loader = torch.utils.data.DataLoader( val_ds, batch_size=self.args.batch_size, collate_fn=collate_fn, shuffle=False, ) def setup_train(self): self.model = Net().to(self.device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.args.lr) self.criterion = nn.CrossEntropyLoss().to(self.device) if not os.path.isdir(self.args.ckpt): os.mkdir(self.args.ckpt) def train_one_epoch(self): train_loss = 0.0 self.model.train() for i, sample in enumerate(self.train_loader): X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device) self.optimizer.zero_grad() output = self.model(X) loss = self.criterion(output, Y_true) loss.backward() self.optimizer.step() train_loss += loss.item() return train_loss / len(self.train_loader) def evaluate(self): val_loss = 0.0 self.model.eval() predicts = [] truths = [] with torch.no_grad(): for i, sample in enumerate(self.val_loader): X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device) output = self.model(X) loss = self.criterion(output, Y_true) val_loss += loss.item() predicts.append(torch.argmax(output, dim=1)) truths.append(Y_true) predicts = torch.cat(predicts, dim=0) truths = torch.cat(truths, dim=0) acc = torch.sum(torch.eq(predicts, truths)) return acc / len(predicts), val_loss / (len(self.val_loader)) def run(self): min_loss = 10e4 max_acc = 0 for epoch in range(self.args.epochs): train_loss = self.train_one_epoch() val_acc, val_loss = self.evaluate() if val_acc > max_acc: max_acc = val_acc torch.save( self.model.state_dict(), os.path.join( self.args.ckpt, "{}_{}_{:.4f}.pth".format(self.args.name, epoch, max_acc), ), ) print( "Epoch {}, loss {:.4f}, val_acc {:.4f}".format( epoch, train_loss, val_acc ) )
} # Neural network and optimizer # We define neural net in model.py so that it can be reused by the evaluate.py script model = Net(features_train.shape[1]) print(model) if use_cuda: print('Using GPU') model.cuda() else: print('Using CPU') optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=config["momentum"]) criterion = nn.CrossEntropyLoss() # Run the functions and save the best model in the function model_ft. model_ft, losses_train, accuracy_train, losses_val, accuracy_val = train_model( model, criterion, optimizer, dataloders, dataset_sizes, use_cuda, num_epochs=config["epochs"]) plt.figure(1)
with open("../data/configuration.json", "r") as file: data = json.load(file) data = json.loads(json.dumps(data)) path = data['data']['paths']["train"] type_ = "train/audio" BATCH_SIZE = 32 N_EPOCHS = 5 N_classes = 30 lr = 0.001 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = Net(BATCH_SIZE, 1, 128, 1).to(device) MAX_NUM_WAVS_PER_CLASS = 2**27 - 1 # ~134M criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9) def train(epoch, data): net.train().to(device) # zero the parameter gradients optimizer.zero_grad() inputs, labels = data # print(type(inputs)) inputs = torch.from_numpy(np.asarray(inputs).astype(np.float32)) permutation = torch.randperm(inputs.size()[0]) running_loss = 0 # print(inputs.size()[0]) count = 0 batch_losses = [] for batch_idx in range(0, inputs.size()[0], BATCH_SIZE):
for k, v in mask_checkpoint.items(): name = k[7:] # remove module. new_mask_state_dict[name] = v model.load_state_dict(new_mask_state_dict)''' model.cuda() if multi_gpus: model = nn.DataParallel(model).to(device) else: model = model.to(device) if args.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.99), eps=1e-08, weight_decay=WEIGHT_DECAY) else: print('Optimizer value error!') loss_func = torch.nn.CrossEntropyLoss() eval_acc_list = []