def trainloop(model, learning_rate, optimizer): import torch.optim as optim if optimizer == "SGD": optimizer = optim.SGD(list(model.parameters()), lr=learning_rate, momentum=0.9) if optimizer == "Adam": optimizer = optim.Adam(list(model.parameters()), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) if optimizer == "Adadelta": optimizer = optim.Adadelta(list(model.parameters()), lr=learning_rate, rho=0.9, eps=1e-06, weight_decay=0) if optimizer == "Adagrad": optimizer = optim.Adagrad(list(model.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10) if optimizer == "AdamW": optimizer = optim.AdamW(list(model.parameters()), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False) if optimizer == "Adamax": optimizer = optim.Adamax(list(model.parameters()), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) if optimizer == "ASGD": optimizer = optim.ASGD(list(model.parameters()), lr=learning_rate, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) if optimizer == "Rprop": optimizer = optim.Rprop(list(model.parameters()), lr=learning_rate, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) if optimizer == "RMSprop": optimizer = optim.RMSprop(list(model.parameters()), lr=learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) import torch.optim as optim import numpy as np import numpy # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) gen = Generator() import random criterion = nn.L1Loss() for epoch in range(1, 6): # loop over the dataset multiple times running_loss = 0.0 i = 0 for i in range(0, 1000): # get the inputs; data is a list of [inputs, labels] k = 0 file_no = random.randint(0, 16000) inputs, labels = get_data_batch(file_no) INN = inputs.to(device) OUT = labels.to(device) optimizer.zero_grad() outputs = model(INN) loss = criterion(outputs, OUT) loss.backward() optimizer.step() del INN del OUT torch.cuda.empty_cache() # print statistics running_loss += loss.item() LOSS = 0.0 if i == 999: # print every 2000 mini-batches# CHANGE THIS VALUE # print('[%d, %5d] loss: %.5f' %(epoch , i + 1, running_loss / 3500)) running_loss = 0.0 g = 0 for g in range(0, 312): k = 0 file_no = random.randint(67012, 72730) inputs, labels = get_data_batch(file_no) INN = inputs.to(device) OUT = labels.to(device) optimizer.zero_grad() outputs = model(INN) loss = criterion(outputs, OUT) LOSS += loss.item() del INN del OUT torch.cuda.empty_cache() LOSS = LOSS / 312 running_loss = 0.0 if epoch == 1: best_loss = LOSS # torch.save(net, 'pca_30_points_relational_model_2.pt') if LOSS < best_loss: best_loss = LOSS # torch.save(net, 'pca_30_points_relational_model_2.pt') del LOSS del running_loss torch.cuda.empty_cache() LOSS = 0 running_loss = 0.0 # best_loss = best_loss.cpu() # best_loss = best_loss.detach().numpy() return best_loss
def optimization_algorithms(SCI_optimizer, cnn, LR, SCI_SGD_MOMENTUM, REGULARIZATION): if type(SCI_optimizer) is str: if (SCI_optimizer == 'Adam'): optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION) if (SCI_optimizer == 'AMSGrad'): optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION, amsgrad=True) if (SCI_optimizer == 'AdamW'): optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION) if (SCI_optimizer == 'RMSprop'): optimizer = optim.RMSprop(cnn.parameters(), lr=LR) #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) : #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR) if (SCI_optimizer == 'SGD'): optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION) if (SCI_optimizer == 'Adadelta'): optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (SCI_optimizer == 'Rprop'): optimizer = optim.Rprop(cnn.parameters(), lr=LR) #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) : # optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (SCI_optimizer == 'Adamax'): optimizer = optim.Adamax(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (SCI_optimizer == 'ASGD'): optimizer = optim.ASGD(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) : #optimizer = optim.LBFGS(cnn.parameters(), lr=LR) else: if (int(SCI_optimizer) == 7): optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 5): optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION, amsgrad=True) if (int(SCI_optimizer) == 4): optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.01, 0.999), weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 3): optimizer = optim.RMSprop(cnn.parameters(), lr=LR) #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) : #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR) if (int(SCI_optimizer) == 2): optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 6): optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 1): optimizer = optim.Rprop(cnn.parameters(), lr=LR) #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) : # optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 8): optimizer = optim.Adamax(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if (int(SCI_optimizer) == 9): optimizer = optim.ASGD(cnn.parameters(), lr=LR, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=REGULARIZATION) #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) : #optimizer = optim.LBFGS(cnn.parameters(), lr=LR) return optimizer
def fit(self, training_data, validation_data, options, model, device=None, detail=False, run=None): """ Perform the training. This is not called "train" because the base class already defines that method with a different meaning. The base class "train" method puts the Module into "training mode". """ print( "Training {} using {} rows of featurized training input...".format( self.name(), training_data.num_rows)) if training_data.mean is not None: self.mean = torch.from_numpy(np.array([[training_data.mean] ])).to(device) self.std = torch.from_numpy(np.array([[training_data.std] ])).to(device) else: self.mean = None self.std = None start = time.time() loss_function = nn.NLLLoss() initial_rate = options.learning_rate lr_scheduler = options.lr_scheduler oo = options.optimizer_options self.training = True if options.optimizer == "Adadelta": optimizer = optim.Adadelta(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, rho=oo.rho, eps=oo.eps) elif options.optimizer == "Adagrad": optimizer = optim.Adagrad(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lr_decay=oo.lr_decay) elif options.optimizer == "Adam": optimizer = optim.Adam(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "Adamax": optimizer = optim.Adamax(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "ASGD": optimizer = optim.ASGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lambd=oo.lambd, alpha=oo.alpha, t0=oo.t0) elif options.optimizer == "RMSprop": optimizer = optim.RMSprop(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, eps=oo.eps, alpha=oo.alpha, momentum=oo.momentum, centered=oo.centered) elif options.optimizer == "Rprop": optimizer = optim.Rprop(self.parameters(), lr=initial_rate, etas=oo.etas, step_sizes=oo.step_sizes) elif options.optimizer == "SGD": optimizer = optim.SGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, momentum=oo.momentum, dampening=oo.dampening) print(optimizer) num_epochs = options.max_epochs batch_size = options.batch_size learning_rate = options.learning_rate lr_min = options.lr_min lr_peaks = options.lr_peaks ticks = training_data.num_rows / batch_size # iterations per epoch # Calculation of total iterations in non-rolling vs rolling training # ticks = num_rows/batch_size (total number of iterations per epoch) # Non-Rolling Training: # Total Iteration = num_epochs * ticks # Rolling Training: # irl = Initial_rolling_length (We are using 2) # If num_epochs <= max_rolling_length: # Total Iterations = sum(range(irl, irl + num_epochs)) # If num_epochs > max_rolling_length: # Total Iterations = sum(range(irl, irl + max_rolling_length)) + (num_epochs - max_rolling_length)*ticks if options.rolling: rolling_length = 2 max_rolling_length = int(ticks) if max_rolling_length > options.max_rolling_length + rolling_length: max_rolling_length = options.max_rolling_length + rolling_length bag_count = 100 hidden_bag_size = batch_size * bag_count if num_epochs + rolling_length < max_rolling_length: max_rolling_length = num_epochs + rolling_length total_iterations = sum(range(rolling_length, max_rolling_length)) if num_epochs + rolling_length > max_rolling_length: epochs_remaining = num_epochs + rolling_length - max_rolling_length total_iterations += epochs_remaining * training_data.num_rows / batch_size ticks = total_iterations / num_epochs else: total_iterations = ticks * num_epochs gamma = options.lr_gamma if not lr_min: lr_min = learning_rate scheduler = None if lr_scheduler == "TriangleLR": steps = lr_peaks * 2 + 1 stepsize = num_epochs / steps scheduler = TriangularLR(optimizer, stepsize * ticks, lr_min, learning_rate, gamma) elif lr_scheduler == "CosineAnnealingLR": # divide by odd number to finish on the minimum learning rate cycles = lr_peaks * 2 + 1 scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=total_iterations / cycles, eta_min=lr_min) elif lr_scheduler == "ExponentialLR": scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma) elif lr_scheduler == "StepLR": scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=options.lr_step_size, gamma=gamma) elif lr_scheduler == "ExponentialResettingLR": reset = (num_epochs * ticks) / 3 # reset at the 1/3 mark. scheduler = ExponentialResettingLR(optimizer, gamma, reset) # optimizer = optim.Adam(model.parameters(), lr=0.0001) log = [] for epoch in range(num_epochs): self.train() if options.rolling: rolling_length += 1 if rolling_length <= max_rolling_length: hidden1_bag = torch.from_numpy( np.zeros([1, hidden_bag_size, model.hidden_units], dtype=np.float32)).to(device) if model.architecture == 'LSTM': cell1_bag = torch.from_numpy( np.zeros([1, hidden_bag_size, model.hidden_units], dtype=np.float32)).to(device) if model.num_layers >= 2: hidden2_bag = torch.from_numpy( np.zeros([1, hidden_bag_size, model.hidden_units], dtype=np.float32)).to(device) if model.architecture == 'LSTM': cell2_bag = torch.from_numpy( np.zeros( [1, hidden_bag_size, model.hidden_units], dtype=np.float32)).to(device) if model.num_layers == 3: hidden3_bag = torch.from_numpy( np.zeros([ 1, hidden_bag_size, training_data.num_keywords ], dtype=np.float32)).to(device) if model.architecture == 'LSTM': cell3_bag = torch.from_numpy( np.zeros([ 1, hidden_bag_size, training_data.num_keywords ], dtype=np.float32)).to(device) for i_batch, (audio, labels) in enumerate( training_data.get_data_loader(batch_size)): if not self.batch_first: audio = audio.transpose(1, 0) # GRU wants seq,batch,feature if device: audio = audio.to(device) labels = labels.to(device) # Also, we need to clear out the hidden state, # detaching it from its history on the last instance. if options.rolling: if rolling_length <= max_rolling_length: if (i_batch + 1) % rolling_length == 0: self.init_hidden() break shuffled_indices = list(range(hidden_bag_size)) np.random.shuffle(shuffled_indices) temp_indices = shuffled_indices[:batch_size] if model.architecture == 'LSTM': if self.hidden1 is not None: hidden1_bag[:, temp_indices, :], cell1_bag[:, temp_indices, :] = self.hidden1 self.hidden1 = (hidden1_bag[:, 0:batch_size, :], cell1_bag[:, 0:batch_size, :]) if model.num_layers >= 2: hidden2_bag[:, temp_indices, :], cell2_bag[:, temp_indices, :] = self.hidden2 self.hidden2 = (hidden2_bag[:, 0:batch_size, :], cell2_bag[:, 0:batch_size, :]) if model.num_layers == 3: hidden3_bag[:, temp_indices, :], cell3_bag[:, temp_indices, :] = self.hidden3 self.hidden3 = (hidden3_bag[:, 0:batch_size, :], cell3_bag[:, 0:batch_size, :]) else: if self.hidden1 is not None: hidden1_bag[:, temp_indices, :] = self.hidden1 self.hidden1 = hidden1_bag[:, 0:batch_size, :] if model.num_layers >= 2: hidden2_bag[:, temp_indices, :] = self.hidden2 self.hidden2 = hidden2_bag[:, 0:batch_size, :] if model.num_layers == 3: hidden3_bag[:, temp_indices, :] = self.hidden3 self.hidden3 = hidden3_bag[:, 0:batch_size, :] else: self.init_hidden() # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable # weights of the model). This is because by default, gradients are # accumulated in buffers( i.e, not overwritten) whenever .backward() # is called. Checkout docs of torch.autograd.backward for more details. optimizer.zero_grad() # optionally normalize the audio if self.mean is not None: audio = (audio - self.mean) / self.std # Run our forward pass. keyword_scores = self(audio) # Compute the loss, gradients loss = loss_function(keyword_scores, labels) # Backward pass: compute gradient of the loss with respect to all the learnable # parameters of the model. Internally, the parameters of each Module are stored # in Tensors with requires_grad=True, so this call will compute gradients for # all learnable parameters in the model. loss.backward() # move to next learning rate if scheduler: scheduler.step() # Calling the step function on an Optimizer makes an update to its parameters # applying the gradients we computed during back propagation optimizer.step() learning_rate = optimizer.param_groups[0]['lr'] if detail: learning_rate = optimizer.param_groups[0]['lr'] log += [{ 'iteration': iteration, 'loss': loss.item(), 'learning_rate': learning_rate }] # Find the best prediction in each sequence and return it's accuracy passed, total, rate = self.evaluate(validation_data, batch_size, device) learning_rate = optimizer.param_groups[0]['lr'] current_loss = float(loss.item()) print( "Epoch {}, Loss {:.3f}, Validation Accuracy {:.3f}, Learning Rate {}" .format(epoch, current_loss, rate * 100, learning_rate)) log += [{ 'epoch': epoch, 'loss': current_loss, 'accuracy': rate, 'learning_rate': learning_rate }] if run is not None: run.log('progress', epoch / num_epochs) run.log('epoch', epoch) run.log('accuracy', rate) run.log('loss', current_loss) run.log('learning_rate', learning_rate) end = time.time() self.training = False print("Trained in {:.2f} seconds".format(end - start)) return log
def fit(self, X, y, early_stop=4e-06, hybrid=None, accept_bias=True, gpu=False, initial=None): t = np.array([np.arange(0, len(X) * self.dt, self.dt)]).T feature, label = Variable(torch.FloatTensor( [t]), requires_grad=True), Variable( torch.FloatTensor([y]), requires_grad=False) #input (time array) and desired output if self.fe is None: self.fe = np.array(np.zeros(len(feature[0]))).T self.fe = torch.FloatTensor([self.fe]) else: self.fe = torch.FloatTensor([self.fe]) #create Model activation_function = MemInt(dt=self.dt, fe=self.fe, h=torch.FloatTensor([X])) self.model = nn.Sequential( OrderedDict([('fc1', nn.Linear(1, self.n_hidden)), ('GLE', activation_function), ('fc2', nn.Linear(self.n_hidden, 1))])) #choose Loss function #criterion = RMSELoss criterion = nn.MSELoss(reduction='mean') #choose optimizer if self.optimizer == 'Rprop': optimizer = optim.Rprop(self.model.parameters(), lr=self.learning_rate) elif self.optimizer == 'Adam': optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) elif self.optimizer == 'SGD': optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate) else: print('Choose an optimzer (Adam or Rprop!)') #initialize weights self.model.apply(self.weights_init_uniform_rule) self.model.fc2.weight.data.uniform_(1, 1) if not initial is None: n_weights = self.n_hidden for i in range(n_weights): #self.model.fc1.weight.data[i] = (initial)**(1/3) self.model.fc1.weight.data[i] = (initial[i][0]) self.model.fc1.weight.data[i] /= n_weights self.model.fc1.bias.data[i] = np.log(initial[i][1]) self.model.fc2.weight.data[i] = (initial[i][0]) self.model.fc2.weight.data[i] /= n_weights #create list of losses for every epoch losses = [] #start training for e in range(0, self.n_epochs): # Training pass optimizer.zero_grad() with torch.no_grad(): if accept_bias == False: self.model.fc1.bias.zero_() self.model.fc2.bias.zero_() self.model.fc2.weight.data.uniform_(1, 1) #print(self.model.fc1.weight.data) #print(self.model.fc1.bias.data) #model.fc1.weight = torch.nn.Parameter output = self.model(feature) loss = criterion(output, label) losses.append(loss.detach().numpy()) print('loss in epoch ' + str(e + 1) + ' : ' + str(loss.detach().numpy())) if loss.detach().numpy() < early_stop: print('Minimal loss reached! early stop of training!') break if not hybrid is None: #Change to SGD if training is trapped in local minimum if loss.detach().numpy() < hybrid: optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate) loss.backward(retain_graph=True) optimizer.step() return losses, self.model
def main(root_dir='data', files=['toy.train'], test_ratio=0.2, valid_ratio=0.1, batch_size=128, which_model='DRNN', cell_type='GRU', input_size=[15817], embed_dim=128, hidden_size=128, dropout_r=0.1, n_layers=1, bii=False, time=False, preTrainEmb='', output_dir='models', model_prefix='toy.train', model_customed='', lr=10**-2, L2=10**-4, eps=10**-8, num_epochs=100, patience=5, optimizer='adam', seed=0, use_cuda=False): """ Predictive Analytics on EHR Args: - root_dir: Path to the folders with pickled file(s) - files: Name(s) of pickled file(s), separated by space. so the argument will be saved as a list If list of 1: data will be first split into train, validation and test, then 3 dataloaders will be created. If list of 3: 3 dataloaders will be created from 3 files directly. Files must be in the following order: training, validation and test. - test_ratio: Test data size Default: 0.2 - valid_ratio: Validation data size Default: 0.1 - batch_size: Batch size for training, validation or test Default: 128 - which_model: Choose from {"RNN", "DRNN", "QRNN", "TLSTM", "LR", "RETAIN"} - cell_type: For RNN based models, choose from {"RNN", "GRU", "LSTM"} - input_size: Input dimension(s) separated in space the output will be a list, decide which embedding types to use. If len of 1, then 1 embedding; len of 3, embedding medical, diagnosis and others separately (3 embeddings) Default: [15817] - embed_dim: Number of embedding dimension Default: 128 - hidden_size: Size of hidden layers Default: 128 - dropout_r: Probability for dropout Default: 0.1 - n_layers: Number of Layers, for Dilated RNNs, dilations will increase exponentialy with mumber of layers Default: 1 - bii: Indicator of whether Bi-directin is activated. Default: False - time: Indicator of whether time is incorporated into embedding. Default: False - preTrainEmb: Path to pretrained embeddings file. Default:'' - output_dir: Output directory where the best model will be saved and logs written Default: we will create'../models/' - model_prefix: Prefix name for the saved model e.g: toy.train Default: [(training)file name] - model_customed: Second customed specs of name for the saved model e.g: _RNN_GRU. Default: none - lr: Learning rate Default: 0.01 - L2: L2 regularization Default: 0.0001 - eps: Term to improve numerical stability Default: 0.00000001 - num_epochs: Number of epochs for training Default: 100 - patience: Number of stagnant epochs to wait before terminating training Default: 5 - optimizer: Select which optimizer to train. Upper/lower case does not matter Default: adam - seed: Seed for reproducibility Default:0 - use_cuda: Use GPU Default:False """ ########################################################################### # 1. Data preparation ########################################################################### print("\nLoading and preparing data...") if len(files) == 1: print( '1 file found. Data will be split into train, validation and test.' ) data = EHRdataFromPickles(root_dir=root_dir, file_name=files[0], sort=False, test_ratio=test_ratio, valid_ratio=valid_ratio, model=which_model, seed=seed) #No sort before splitting # Dataloader splits train, test, valid = data.__splitdata__() #this time, sort is true # can comment out this part if you dont want to know what's going on here print("\nSee an example data structure from training data:") print(data.__getitem__(35, seeDescription=True)) elif len(files) == 2: print( '2 files found. 2 dataloaders will be created for train and validation' ) train = EHRdataFromPickles(root_dir=root_dir, file_name=files[0], sort=True, model=which_model, seed=seed) valid = EHRdataFromPickles(root_dir=root_dir, file_name=files[1], sort=True, model=which_model, seed=seed) test = None else: print('3 files found. 3 dataloaders will be created for each') train = EHRdataFromPickles(root_dir=root_dir, file_name=files[0], sort=True, model=which_model, seed=seed) valid = EHRdataFromPickles(root_dir=root_dir, file_name=files[1], sort=True, model=which_model, seed=seed) test = EHRdataFromPickles(root_dir=root_dir, file_name=files[2], sort=True, model=which_model, seed=seed) print("\nSee an example data structure from training data:") print(train.__getitem__(40, seeDescription=True)) print(f"\nTraining data contains {len(train)} patients") print(f"Validation data contains {len(valid)} patients") print(f"Test data contains {len(test)} patients" if test else "No test file provided") ########################################################################### # 2. Model loading ########################################################################### print(f"\n{args.which_model} model initialization...", end="") pack_pad = True if which_model == "RNN" else False if which_model == 'RNN': ehr_model = models.EHR_RNN(input_size=input_size, embed_dim=embed_dim, hidden_size=hidden_size, use_cuda=use_cuda, n_layers=n_layers, dropout_r=dropout_r, cell_type=cell_type, bii=bii, time=time, preTrainEmb=preTrainEmb) elif which_model == 'DRNN': ehr_model = models.EHR_DRNN( input_size=input_size, embed_dim=embed_dim, hidden_size=hidden_size, use_cuda=use_cuda, n_layers=n_layers, dropout_r=dropout_r, #default =0 cell_type=cell_type, #default ='DRNN' bii=False, time=time, preTrainEmb=preTrainEmb) elif which_model == 'QRNN': ehr_model = models.EHR_QRNN( input_size=input_size, embed_dim=embed_dim, hidden_size=hidden_size, use_cuda=use_cuda, n_layers=n_layers, dropout_r=dropout_r, #default =0.1 cell_type='QRNN', #doesn't support normal cell types bii=False, #QRNN doesn't support bi time=time, preTrainEmb=preTrainEmb) elif which_model == 'TLSTM': ehr_model = models.EHR_TLSTM( input_size=input_size, embed_dim=embed_dim, hidden_size=hidden_size, use_cuda=use_cuda, n_layers=n_layers, dropout_r=dropout_r, #default =0.1 cell_type='TLSTM', #doesn't support normal cell types bii=False, time=time, preTrainEmb=preTrainEmb) elif which_model == 'RETAIN': ehr_model = models.RETAIN(input_size=input_size, embed_dim=embed_dim, hidden_size=hidden_size, use_cuda=use_cuda, n_layers=n_layers) else: ehr_model = models.EHR_LR_emb(input_size=input_size, embed_dim=embed_dim, use_cuda=use_cuda, preTrainEmb=preTrainEmb) print("Done") ########################################################################### # 3. call dataloader and create a list of minibatches ########################################################################### # separate loader and minibatches for train, test, validation # Note: mbs stands for minibatches print('\nCreating the list of training minibatches') train_mbs = list( tqdm( EHRdataloader(train, use_cuda=use_cuda, batch_size=batch_size, packPadMode=pack_pad))) print('\nCreating the list of valid minibatches') valid_mbs = list( tqdm( EHRdataloader(valid, use_cuda=use_cuda, batch_size=batch_size, packPadMode=pack_pad))) print('\nCreating the list of test minibatches') test_mbs = list( tqdm( EHRdataloader(test, use_cuda=use_cuda, batch_size=batch_size, packPadMode=pack_pad))) if test else None # make sure cuda is working if use_cuda: ehr_model = ehr_model.cuda() print(f"\n{args.optimizer.title()} optimizer initialization...", end="") #model optimizers to choose from. Upper/lower case dont matter if args.optimizer.lower() == 'adam': optimizer = optim.Adam(ehr_model.parameters(), lr=lr, weight_decay=L2, eps=eps) elif args.optimizer.lower() == 'adadelta': optimizer = optim.Adadelta(ehr_model.parameters(), lr=lr, weight_decay=L2, eps=eps) elif args.optimizer.lower() == 'adagrad': optimizer = optim.Adagrad(ehr_model.parameters(), lr=lr, weight_decay=L2) elif args.optimizer.lower() == 'adamax': optimizer = optim.Adamax(ehr_model.parameters(), lr=lr, weight_decay=L2, eps=eps) elif args.optimizer.lower() == 'asgd': optimizer = optim.ASGD(ehr_model.parameters(), lr=lr, weight_decay=L2) elif args.optimizer.lower() == 'rmsprop': optimizer = optim.RMSprop(ehr_model.parameters(), lr=lr, weight_decay=L2, eps=eps) elif args.optimizer.lower() == 'rprop': optimizer = optim.Rprop(ehr_model.parameters(), lr=lr) elif args.optimizer.lower() == 'sgd': optimizer = optim.SGD(ehr_model.parameters(), lr=lr, weight_decay=L2) else: raise NotImplementedError print("Done") ########################################################################### # 4. Train, validation and test. default: batch shuffle = true ########################################################################### try: ut.epochs_run( num_epochs, train=train_mbs, valid=valid_mbs, test=test_mbs, model=ehr_model, optimizer=optimizer, shuffle=True, #batch_size = batch_size, which_model=which_model, patience=patience, output_dir=output_dir, model_prefix=model_prefix, model_customed=model_customed) #we can keyboard interupt now except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
# get output layer output y_pred = self.out(h_output2) return y_pred # define a neural network using the customised structure net = MultiLayerNet(input_neurons, output_neurons) # define loss function loss_func = torch.nn.CrossEntropyLoss() # define optimizer for standard network optimiser = optim.Rprop(net.parameters(), lr=learning_rate, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) # store all losses for visualisation all_losses = [] previous_loss = None # train a neural network for epoch in range(num_epochs): # Perform forward pass: compute predicted y by passing x to the model. Y_pred = net(X) # Compute loss loss = loss_func(Y_pred, Y)
def reconstruct_stim(features, net, img_mean=np.array((0, 0, 0)).astype(np.float32), img_std=np.array((1, 1, 1)).astype(np.float32), norm=255, bgr=False, initial_input=None, input_size=(224, 224, 3), feature_masks=None, layer_weight=None, channel=None, mask=None, opt_name='SGD', prehook_dict = {}, lr_start=0.02, lr_end=1e-12, momentum_start=0.009, momentum_end=0.009, decay_start=0.02, decay_end=1e-11, grad_normalize = True, image_jitter=False, jitter_size=4, image_blur=True, sigma_start=2, sigma_end=0.5, p=3, lamda=0.5, TVlambda = [0,0], clip_extreme=False, clip_extreme_every=4, e_pct_start=1, e_pct_end=1, clip_small_norm=False, clip_small_norm_every=4, n_pct_start=5., n_pct_end=5., loss_type='l2', iter_n=200, save_intermediate=False, save_intermediate_every=1, save_intermediate_path=None, disp_every=1, ): if loss_type == "l2": loss_fun = torch.nn.MSELoss(reduction='sum') elif loss_type == "L2_with_reg": loss_fun = MSE_with_regulariztion(L_lambda=lamda, alpha=p, TV_lambda=TVlambda) else: assert loss_type + ' is not correct' # make save dir if save_intermediate: if save_intermediate_path is None: save_intermediate_path = os.path.join('..', 'recon_img_by_icnn' + datetime.now().strftime('%Y%m%dT%H%M%S')) if not os.path.exists(save_intermediate_path): os.makedirs(save_intermediate_path) # image size input_size = input_size # image mean img_mean = img_mean img_std = img_std norm = norm # image norm noise_img = np.random.randint(0, 256, (input_size)) img_norm0 = np.linalg.norm(noise_img) img_norm0 = img_norm0/2. # initial input if initial_input is None: initial_input = np.random.randint(0, 256, (input_size)) else: input_size = initial_input.shape if save_intermediate: if len(input_size) == 3: #image save_name = 'initial_image.jpg' if bgr: PIL.Image.fromarray(np.uint8(initial_input[...,[2,1,0]])).save(os.path.join(save_intermediate_path, save_name)) else: PIL.Image.fromarray(np.uint8(initial_input)).save(os.path.join(save_intermediate_path, save_name)) elif len(input_size) == 4: # video # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format save_name = 'initial_video.avi' save_video(initial_input, save_name, save_intermediate_path, bgr) save_name = 'initial_video.gif' save_gif(initial_input, save_name, save_intermediate_path, bgr, fr_rate=150) else: print('Input size is not appropriate for save') assert len(input_size) not in [3,4] # layer_list layer_dict = features layer_list = list(features.keys()) # number of layers num_of_layer = len(layer_list) # layer weight if layer_weight is None: weights = np.ones(num_of_layer) weights = np.float32(weights) weights = weights / weights.sum() layer_weight = {} for j, layer in enumerate(layer_list): layer_weight[layer] = weights[j] # feature mask if feature_masks is None: feature_masks = create_feature_masks(layer_dict, masks=mask, channels=channel) # iteration for gradient descent input = initial_input.copy().astype(np.float32) if len(input_size) == 3: input = img_preprocess(input, img_mean, img_std, norm) else: input = vid_preprocess(input, img_mean, img_std, norm) loss_list = np.zeros(iter_n, dtype='float32') for t in range(iter_n): # parameters lr = lr_start + t * (lr_end - lr_start) / iter_n momentum = momentum_start + t * (momentum_end - momentum_start) / iter_n decay = decay_start + t * (decay_end - decay_start) / iter_n sigma = sigma_start + t * (sigma_end - sigma_start) / iter_n # shift if image_jitter: ox, oy = np.random.randint(-jitter_size, jitter_size+1, 2) input = np.roll(np.roll(input, ox, -1), oy, -2) # forward input = torch.tensor(input[np.newaxis], requires_grad=True) if opt_name == 'Adam': #op = optim.Adam([input], lr = lr) op = optim.Adam([input], lr = lr) elif opt_name == 'SGD': op = optim.SGD([input], lr=lr, momentum=momentum) #op = optim.SGD([input], lr=lr) elif opt_name == 'Adadelta': op = optim.Adadelta([input],lr = lr) elif opt_name == 'Adagrad': op = optim.Adagrad([input], lr = lr) elif opt_name == 'AdamW': op = optim.AdamW([input], lr = lr) elif opt_name == 'SparseAdam': op = optim.SparseAdam([input], lr = lr) elif opt_name == 'Adamax': op = optim.Adamax([input], lr = lr) elif opt_name == 'ASGD': op = optim.ASGD([input], lr = lr) elif opt_name == 'RMSprop': op = optim.RMSprop([input], lr = lr) elif opt_name == 'Rprop': op = optim.Rprop([input], lr = lr) fw = get_cnn_features(net, input, features.keys(), prehook_dict) # backward for net err = 0. loss = 0. # set the grad of network to 0 net.zero_grad() op.zero_grad() for j in range(num_of_layer): # op.zero_grad() target_layer_id = num_of_layer -1 -j target_layer = layer_list[target_layer_id] # extract activation or mask at input true video, and mask act_j = fw[target_layer_id].clone() feat_j = features[target_layer].clone() mask_j = feature_masks[target_layer] layer_weight_j = layer_weight[target_layer] masked_act_j = torch.masked_select(act_j, torch.FloatTensor(mask_j).bool()) masked_feat_j = torch.masked_select(feat_j, torch.FloatTensor(mask_j).bool()) # calculate loss using pytorch loss function loss_j = loss_fun(masked_act_j, masked_feat_j) * layer_weight_j # backward the gradient to the video loss_j.backward(retain_graph=True) loss += loss_j.detach().numpy() if grad_normalize: grad_mean = torch.abs(input.grad).mean() if grad_mean > 0: input.grad /= grad_mean op.step() input = input.detach().numpy()[0] err = err + loss loss_list[t] = loss # clip pixels with extreme value if clip_extreme and (t+1) % clip_extreme_every == 0: e_pct = e_pct_start + t * (e_pct_end - e_pct_start) / iter_n input = clip_extreme_value(input, e_pct) # clip pixels with small norm if clip_small_norm and (t+1) % clip_small_norm_every == 0: n_pct = n_pct_start + t * (n_pct_end - n_pct_start) / iter_n input = clip_small_norm_value(input, n_pct) # unshift if image_jitter: input = np.roll(np.roll(input, -ox, -1), -oy, -2) # L_2 decay input = (1-decay) * input # gaussian blur if image_blur: if len(input_size) == 3: input = gaussian_blur(input, sigma) else: for i in range(input.shape[1]): input[:, i] = gaussian_blur(input[:, i], sigma) # disp info if (t+1) % disp_every == 0: print('iter=%d; err=%g;' % (t+1, err)) # save image if save_intermediate and ((t+1) % save_intermediate_every == 0): if len(input_size) == 3: save_name = '%05d.jpg' % (t+1) PIL.Image.fromarray(normalise_img(img_deprocess(input, img_mean, img_std, norm))).save( os.path.join(save_intermediate_path, save_name)) else: save_stim = input # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format save_name = '%05d.avi' % (t + 1) save_video(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=30) save_name = '%05d.gif' % (t + 1) save_gif(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name, save_intermediate_path, bgr, fr_rate=150) # return img if len(input_size) == 3: return img_deprocess(input, img_mean, img_std, norm), loss_list else: return vid_deprocess(input, img_mean, img_std, norm), loss_list
def optim_selection(self): if self.config.optim == "Nesterov": return optim.SGD( self.model.parameters(), lr=self.config.lr, momentum=0.9, nesterov=True, weight_decay=0.0001, ) elif self.config.optim == "SGD": # weight_decay = l2 regularization return optim.SGD( self.model.parameters(), lr=self.config.lr, momentum=0.9, nesterov=False, weight_decay=0.0001, ) elif self.config.optim == "Adadelta": # default lr = 1.0 return optim.Adadelta( self.model.parameters(), lr=self.config.lr, rho=0.9, eps=1e-06, weight_decay=1e-6, ) elif self.config.optim == "Adagrad": # default lr = 0.01 return optim.Adagrad( self.model.parameters(), lr=self.config.lr, lr_decay=0, weight_decay=1e-6, initial_accumulator_value=0, eps=1e-10, ) elif self.config.optim == "Adam": # default lr=0.001 return optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=1e-6) elif self.config.optim == "AdamW": # default lr=0.001 return optim.AdamW( self.model.parameters(), lr=self.config.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, ) elif self.config.optim == "SparseAdam": # default lr = 0.001 return optim.SparseAdam( self.model.parameters(), lr=self.config.lr, betas=(0.9, 0.999), eps=1e-08, ) elif self.config.optim == "Adamax": # default lr=0.002 return optim.Adamax( self.model.parameters(), lr=self.config.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-6, ) elif self.config.optim == "ASGD": return optim.ASGD( self.model.parameters(), lr=self.config.lr, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=1e-6, ) elif self.config.optim == "RMSprop": # default lr=0.01 return optim.RMSprop( self.model.parameters(), lr=self.config.lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, ) elif self.config.optim == "Rprop": # default lr=0.01 return optim.Rprop( self.model.parameters(), lr=self.config.lr, etas=(0.5, 1.2), step_sizes=(1e-06, 50), )
{"params": model.parameters()}, optim.LBFGS(lr=0.1, params=model.parameters()), id="LBFGSConf", ), pytest.param( "RMSprop", {"lr": 0.1}, {"params": model.parameters()}, optim.RMSprop(lr=0.1, params=model.parameters()), id="RMSpropConf", ), pytest.param( "Rprop", {"lr": 0.1}, {"params": model.parameters()}, optim.Rprop(lr=0.1, params=model.parameters()), id="RpropConf", ), pytest.param( "SGD", {"lr": 0.1}, {"params": model.parameters()}, optim.SGD(lr=0.1, params=model.parameters()), id="SGDConf", ), pytest.param( "SparseAdam", {"lr": 0.1}, {"params": model.parameters()}, optim.SparseAdam(lr=0.1, params=model.parameters()), id="SparseAdamConf",
def run_model(trainloader, validloader, epochs, use_rprop, learning_rate, momentum=0, etas=None, step_sizes=None, num_filters=6, fc1_size=120, fc2_size=84, save_weights=False, gpu=False): ''' :param use_rprop: True if using rprop optimizer, False if using SGD optimizer :param learning_rate: :param momentum: :return: ''' # set up the model and optimizer net = create_model(num_filters, fc1_size, fc2_size) if gpu: print('using gpu!!!!!') net = net.cuda() criterion = nn.CrossEntropyLoss() if (use_rprop): print("using rprop!!!!") optimizer = optim.Rprop( net.parameters(), lr=learning_rate, etas=etas, step_sizes=step_sizes ) #(default params: lr = 0.01, etas = (0.5,1.2), step_sizes(1e-06,50)) print("etas2: ", etas) print("Step2: ", step_sizes) print("opt: ", optimizer) else: print("using sgd!!!") optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) # train the model weights = [[net.conv1.weight], [net.conv2.weight], [net.fc1.weight], [net.fc2.weight], [net.fc3.weight]] start_time = datetime.now() for epoch in range(epochs): # loop over the dataset multiple times running_loss = 0.0 total_train = 0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 total_train += labels.size(0) weights[0].append(net.conv1.weight) weights[1].append(net.conv2.weight) weights[2].append(net.fc1.weight) weights[3].append(net.fc2.weight) weights[4].append(net.fc3.weight) # test the model ont he validation set correct = 0 total = 0 with torch.no_grad(): for data in validloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the ', total, ' validation images: %d %%' % (100 * correct / total)) valid_accuracy = 100 * correct / total # do timing stuff end_time = datetime.now() total_time = end_time - start_time print('Finished Training in: ', total_time) timer_arr.append(total_time) print("train size: ", total_train) if save_weights: pickle.dump(weights, open("weights.p", "wb")) return valid_accuracy, net
# 第一个1 为输入维度1, 第二个为输出维度 self.linear = nn.Linear(1, 1) def forward(self, x): out = self.linear(x) return out if __name__ == '__main__': model = LR() # 定义损失函数 criterion = nn.MSELoss() # 定义优化器 optimizer = optim.Rprop(model.parameters(), lr=.0001) # 训练模型 num_epoches = 1000 for epach in range(num_epoches): inputs = Variable(x_train) target = Variable(y_train) # 实际值 out = model(inputs) # 预测值 loss = criterion(out, target) # 优化器用于更新网络参数 optimizer.zero_grad() # 求导 loss.backward() # 更新参数 optimizer.step() if (epach + 1) % 1 == 0:
def get_optimizer(optimizer, optimizer_config, params): ''' get the optimizer of worker model ''' if optimizer == 'SGD': p = ['lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'nesterov': optimizer_config[i] = False else: optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], momentum=optimizer_config['momentum'], dampening=optimizer_config['dampening'], weight_decay=optimizer_config['weight_decay'], nesterov=optimizer_config['nesterov']) return opti elif optimizer == 'Rprop': p = ['lr', 'etas', 'step_sizes'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 1e-2 elif i == 'etas': optimizer_config[i] = (0.5, 1.2) elif i == 'step_sizes': optimizer_config[i] = (1e-6, 50) opti = optim.Rprop(params, lr=optimizer_config['lr'], etas=optimizer_config['etas'], step_sizes=optimizer_config['step_sizes']) return opti elif optimizer == 'RMSprop': p = ['lr', 'alpha', 'eps', 'weight_decay', 'momentum', 'centered'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 1e-2 elif i == 'alpha': optimizer_config[i] = 0.99 elif i == 'eps': optimizer_config[i] = 1e-8 elif i == 'weight_decay': optimizer_config[i] = 0 elif i == 'momentum': optimizer_config[i] = 0 elif i == 'centered': optimizer_config[i] = False opti = optim.SGD(params, lr=optimizer_config['lr'], alpha=optimizer_config['alpha'], eps=optimizer_config['eps'], weight_decay=optimizer_config['weight_decay'], momentum=optimizer_config['momentum'], centered=optimizer_config['centered']) return opti elif optimizer == 'LBFGS': p = [ 'lr', 'max_iter', 'max_eval', 'tolerance_grad', 'tolerance_change', 'history_size', 'line_search_fn' ] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 1 elif i == 'max_iter': optimizer_config[i] = 20 elif i == 'max_eval': optimizer_config[i] = None elif i == 'tolerance_grad': optimizer_config[i] = 1e-5 elif i == 'tolerance_change': optimizer_config[i] = 1e-9 elif i == 'history_size': optimizer_config[i] = 100 elif i == 'line_search_fn': optimizer_config[i] = None opti = optim.SGD(params, lr=optimizer_config['lr'], max_iter=optimizer_config['max_iter'], tolerance_grad=optimizer_config['tolerance_grad'], tolerance_change=optimizer_config['tolerance_change'], history_size=optimizer_config['history_size'], line_search_fn=optimizer_config['line_search_fn']) return opti elif optimizer == 'ASGD': p = ['lr', 'lambd', 'alpha', 't0', 'weight_decay'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 1e-2 elif i == 'lambd': optimizer_config[i] = 1e-4 elif i == 'alpha': optimizer_config[i] = 0.75 elif i == 't0': optimizer_config[i] = 1e-6 elif i == 'weight_decay': optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], lambd=optimizer_config['lambd'], alpha=optimizer_config['alpha'], t0=optimizer_config['t0'], weight_decay=optimizer_config['weight_decay']) return opti elif optimizer == 'Adamax': p = ['lr', 'betas', 'eps', 'weight_decay'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 0.002 elif i == 'betas': optimizer_config[i] = (0.9, 0.999) elif i == 'eps': optimizer_config[i] = 1e-08 elif i == 'weight_decay': optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], betas=optimizer_config['betas'], eps=optimizer_config['eps'], weight_decay=optimizer_config['weight_decay']) return opti elif optimizer == 'SparseAdam': p = ['lr', 'betas', 'eps'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 0.001 elif i == 'betas': optimizer_config[i] = (0.9, 0.999) elif i == 'eps': optimizer_config[i] = 1e-08 opti = optim.SGD(params, lr=optimizer_config['lr'], betas=optimizer_config['betas'], eps=optimizer_config['eps']) return opti elif optimizer == 'Adam': p = ['lr', 'betas', 'eps', 'weight_decay'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 0.001 elif i == 'betas': optimizer_config[i] = (0.9, 0.999) elif i == 'eps': optimizer_config[i] = 1e-08 elif i == 'weight_decay': optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], betas=optimizer_config['betas'], eps=optimizer_config['eps'], weight_decay=optimizer_config['weight_decay']) return opti elif optimizer == 'Adagrad': p = ['lr', 'lr_decay', 'weight_decay'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 0.01 elif i == 'lr_decay': optimizer_config[i] = 0 elif i == 'weight_decay': optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], lr_decay=optimizer_config['lr_decay'], weight_decay=optimizer_config['weight_decay']) return opti elif optimizer == 'Adadelta': p = ['lr', 'rho', 'eps', 'weight_decay'] keys = list(optimizer_config.keys()) unde = list(set(p) ^ set(keys)) for i in unde: if i == 'lr': optimizer_config[i] = 1.0 elif i == 'rho': optimizer_config[i] = 0.9 elif i == 'eps': optimizer_config[i] = 1e-06 elif i == 'weight_decay': optimizer_config[i] = 0 opti = optim.SGD(params, lr=optimizer_config['lr'], rho=optimizer_config['rho'], eps=optimizer_config['eps'], weight_decay=optimizer_config['weight_decay']) return opti else: raise ValueError( 'the optimizer is exactly the same as the original pytorch, please check again!' )
def tune_train_eval(loader, model, criterion, metric, config, tuned, reporter): for key, value in tuned.items(): config[key] = str(value) #choose model type(whether DataParallel or not) if 'multiGPU' in config.keys() and config['multiGPU'] == 'Y': model = nn.DataParallel(model) if torch.cuda.is_available(): model = model.cuda() #default optimizer -> adam optimizer = optim.Adam(model.parameters()) #start setting optimizer if 'optimizer' in config.keys(): if config['optimizer'] == 'sgd': optimizer = optim.SGD(model.parameters(), lr=float(config['learning_rate'])) elif config['optimizer'] == 'rmsprop': optimizer = optim.RMSprop(model.parameters()) elif config['optimizer'] == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config['optimizer'] == 'adagrad': optimizer = optim.Adagrad(model.parameters()) elif config['optimizer'] == 'sparseAdam': optimizer = optim.SparseAdam(model.parameters()) elif config['optimizer'] == 'adamax': optimier = optim.Adamax(model.parameters()) elif config['optimizer'] == 'asgd': optimizer = optim.ASGD(model.parameters()) elif config['optimizer'] == 'lbfgs': optimizer = optim.LBFGS(model.parameters()) elif config['optimizer'] == 'rprop': optimizer = optim.Rprop(model.parameters()) optimizer.param_groups[0]['lr'] = float(config['learning_rate']) if 'momentum' in config.keys(): optimizer.param_groups[0]['momentum'] = float(config['momentum']) if 'lr_deacy' in config.keys(): optimizer.param_groups[0]['lr_decay'] = float(config['lr_decay']) if 'weight_deacy' in config.keys(): optimizer.param_groups[0]['weight_decay'] = float( config['weight_decay']) if 'amsgrad' in config.keys(): optimizer.param_groups[0]['amsgrad'] = eval(config['amsgrad']) if 'weight_deacy' in config.keys(): optimizer.param_groups[0]['weight_decay'] = float( config['weight_decay']) if 'nesterov' in config.keys(): optimizer.param_groups[0]['nesterov'] = float(config['nesterov']) #end setting optimizer #prepare model save dir if 'model_save_dir' in config.keys(): if not os.path.isdir(config['model_save_dir']): os.mkdir(config['model_save_dir']) #prepare trainLoder, testLoder seperately trainLoader = loader.trainLoader testLoader = loader.testLoader def train_epoch(epoch, loader, model, criterion, config, optimizer): if type(model) == list: for eachModel in model: eachModel.train() else: model.train() for batch_idx, (data, target) in enumerate(loader): if torch.cuda.is_available(): data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() if 'train_log_interval' in config.keys(): if batch_idx % int(config['train_log_interval']) == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'. format(epoch, batch_idx * len(data), len(loader.dataset), 100. * batch_idx / len(loader), loss.data.item())) if 'mlflow_tracking_URI' in config.keys(): mlflow.log_metric('train_loss', loss.data.item()) def test_epoch(loader, model, criterion, metric, config): model.eval() test_loss = 0 correct = 0 predictions = [] answers = [] with torch.no_grad(): for batch_idx, (data, target) in enumerate(loader): if torch.cuda.is_available(): data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) output = model(data) test_loss += criterion( output, target).sum().item() # sum up batch loss #apply custom metric(in this case, Accuracy) predictions += list(output.data.max(1)[1].cpu().numpy( )) # get the index of the max log-probability answers += list(target.data.cpu().numpy()) test_loss /= len(loader.dataset) test_accuracy = metric.evaluate(predictions, answers) print('\nTest set: Average loss: {:.4f}, Accuracy: ({:.2f}%)\n'.format( test_loss, test_accuracy * 100)) if 'mlflow_tracking_URI' in config.keys(): mlflow.log_metric('test_loss', test_loss) mlflow.log_metric('test_accuracy', test_accuracy) print('test acc:' + str(test_accuracy)) reporter(mean_loss=test_loss, mean_accuracy=test_accuracy) #set MLflow tracking server if 'mlflow_tracking_URI' in config.keys(): print("MLflow Tracking URI: %s" % (config['mlflow_tracking_URI'])) mlflow.set_tracking_uri(config['mlflow_tracking_URI']) with mlflow.start_run(): print('setting parameters') for key, value in config.items(): mlflow.log_param(key, value) print(key + '\t:\t' + value) for epoch in range(1, int(config['epoch']) + 1): print('epoch: ' + str(epoch)) train_epoch(epoch, trainLoader, model, criterion, config, optimizer) if 'model_save_dir' in config.keys( ) and 'model_save_interval' in config.keys(): if epoch % int(config['model_save_interval']) == 0: if 'multiGPU' in config.keys( ) and config['multiGPU'] == 'Y': torch.save( model.module, os.getcwd() + os.sep + config['model_save_dir'] + os.sep + config['model_name_prefix'] + '_epoch_' + str(epoch) + '.pkl') else: torch.save( model, os.getcwd() + os.sep + config['model_save_dir'] + os.sep + config['model_name_prefix'] + '_epoch_' + str(epoch) + '.pkl') print('model saved: ' + os.getcwd() + os.sep + config['model_save_dir'] + os.sep + config['model_name_prefix'] + '_epoch_' + str(epoch) + '.pkl') test_epoch(testLoader, model, criterion, metric, config)
def fit(self, training_data, validation_data, options, device=None, detail=False): """ Perform the training. This is not called "train" because the base class already defines that method with a different meaning. The base class "train" method puts the Module into "training mode". """ print( "Training {} using {} rows of featurized training input...".format( self.name(), training_data.num_rows)) start = time.time() loss_function = nn.NLLLoss() initial_rate = options.learning_rate lr_scheduler = options.lr_scheduler oo = options.optimizer_options if options.optimizer == "Adadelta": optimizer = optim.Adadelta(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, rho=oo.rho, eps=oo.eps) elif options.optimizer == "Adagrad": optimizer = optim.Adagrad(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lr_decay=oo.lr_decay) elif options.optimizer == "Adam": optimizer = optim.Adam(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "Adamax": optimizer = optim.Adamax(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, betas=oo.betas, eps=oo.eps) elif options.optimizer == "ASGD": optimizer = optim.ASGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, lambd=oo.lambd, alpha=oo.alpha, t0=oo.t0) elif options.optimizer == "RMSprop": optimizer = optim.RMSprop(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, eps=oo.eps, alpha=oo.alpha, momentum=oo.momentum, centered=oo.centered) elif options.optimizer == "Rprop": optimizer = optim.Rprop(self.parameters(), lr=initial_rate, etas=oo.etas, step_sizes=oo.step_sizes) elif options.optimizer == "SGD": optimizer = optim.SGD(self.parameters(), lr=initial_rate, weight_decay=oo.weight_decay, momentum=oo.momentum, dampening=oo.dampening) print(optimizer) num_epochs = options.max_epochs batch_size = options.batch_size learning_rate = options.learning_rate lr_min = options.lr_min lr_peaks = options.lr_peaks ticks = training_data.num_rows / batch_size # iterations per epoch total_iterations = ticks * num_epochs gamma = options.lr_gamma if not lr_min: lr_min = learning_rate scheduler = None if lr_scheduler == "TriangleLR": steps = lr_peaks * 2 + 1 stepsize = num_epochs / steps scheduler = TriangularLR(optimizer, stepsize * ticks, lr_min, learning_rate, gamma) elif lr_scheduler == "CosineAnnealingLR": # divide by odd number to finish on the minimum learning rate cycles = lr_peaks * 2 + 1 scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=total_iterations / cycles, eta_min=lr_min) elif lr_scheduler == "ExponentialLR": scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma) elif lr_scheduler == "StepLR": scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=options.lr_step_size, gamma=gamma) elif lr_scheduler == "ExponentialResettingLR": reset = (num_epochs * ticks) / 3 # reset at the 1/3 mark. scheduler = ExponentialResettingLR(optimizer, gamma, reset) # optimizer = optim.Adam(model.parameters(), lr=0.0001) log = [] for epoch in range(num_epochs): self.train() iteration = 0 for i_batch, (audio, labels) in enumerate( training_data.get_data_loader(batch_size)): if not self.batch_first: audio = audio.transpose(1, 0) # GRU wants seq,batch,feature if device: audio = audio.to(device) labels = labels.to(device) # Also, we need to clear out the hidden state, # detaching it from its history on the last instance. self.init_hidden() # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable # weights of the model). This is because by default, gradients are # accumulated in buffers( i.e, not overwritten) whenever .backward() # is called. Checkout docs of torch.autograd.backward for more details. optimizer.zero_grad() # Run our forward pass. keyword_scores = self(audio) # Compute the loss, gradients loss = loss_function(keyword_scores, labels) # Backward pass: compute gradient of the loss with respect to all the learnable # parameters of the model. Internally, the parameters of each Module are stored # in Tensors with requires_grad=True, so this call will compute gradients for # all learnable parameters in the model. loss.backward() # move to next learning rate if scheduler: scheduler.step() # Calling the step function on an Optimizer makes an update to its parameters # applying the gradients we computed during back propagation optimizer.step() learning_rate = optimizer.param_groups[0]['lr'] if detail: learning_rate = optimizer.param_groups[0]['lr'] log += [{ 'iteration': iteration, 'loss': loss.item(), 'learning_rate': learning_rate }] iteration += 1 # Find the best prediction in each sequence and return it's accuracy passed, total, rate = self.evaluate(validation_data, batch_size, device) learning_rate = optimizer.param_groups[0]['lr'] print( "Epoch {}, Loss {}, Validation Accuracy {:.3f}, Learning Rate {}" .format(epoch, loss.item(), rate * 100, learning_rate)) log += [{ 'epoch': epoch, 'loss': loss.item(), 'accuracy': rate, 'learning_rate': learning_rate }] end = time.time() print("Trained in {:.2f} seconds".format(end - start)) return log
def get_optimizer(optimizer_name, optimizer_dict, network_params): if optimizer_name == "Adadelta": optimizer = optim.Adadelta(network_params, weight_decay=optimizer_dict["weight_decay"], rho=optimizer_dict["rho"], eps=optimizer_dict["eps"], lr=optimizer_dict["lr"]) if optimizer_name == "Adagrad": optimizer = optim.Adagrad(network_params, weight_decay=optimizer_dict["weight_decay"], lr_decay=optimizer_dict["lr_decay"], eps=optimizer_dict["eps"], lr=optimizer_dict["lr"]) if optimizer_name == "Adam": optimizer = optim.Adam(network_params, weight_decay=optimizer_dict["weight_decay"], betas=eval(optimizer_dict["betas"]), eps=optimizer_dict["eps"], lr=optimizer_dict["lr"], amsgrad=optimizer_dict["amsgrad"]) if optimizer_name == "AdamW": optimizer = optim.AdamW(network_params, weight_decay=optimizer_dict["weight_decay"], betas=eval(optimizer_dict["betas"]), eps=optimizer_dict["eps"], lr=optimizer_dict["lr"], amsgrad=optimizer_dict["amsgrad"]) if optimizer_name == "SparseAdam": optimizer = optim.SparseAdam(network_params, betas=eval(optimizer_dict["betas"]), eps=optimizer_dict["eps"], lr=optimizer_dict["lr"]) if optimizer_name == "Adamax": optimizer = optim.Adamax(network_params, betas=eval(optimizer_dict["betas"]), eps=optimizer_dict["eps"], lr=optimizer_dict["lr"], weight_decay=optimizer_dict["weight_decay"]) if optimizer_name == "ASGD": optimizer = optim.ASGD(network_params, lr=optimizer_dict["lr"], lambd=optimizer_dict["lambd"], alpha=optimizer_dict["alpha"], t0=optimizer_dict["t0"], weight_decay=optimizer_dict["weight_decay"]) if optimizer_name == "LBFGS": optimizer = optim.LBFGS( network_params, lr=optimizer_dict["lr"], max_iter=optimizer_dict["max_iter"], max_eval=optimizer_dict["max_eval"], tolerance_grad=optimizer_dict["tolerance_grad"], tolerance_change=optimizer_dict["tolerance_change"], history_size=optimizer_dict["history_size"], line_search_fn=optimizer_dict["line_search_fn"]) if optimizer_name == "RMSprop": optimizer = optim.RMSprop(network_params, weight_decay=optimizer_dict["weight_decay"], lr=optimizer_dict["lr"], momentum=optimizer_dict["momentum"], alpha=optimizer_dict["alpha"], eps=optimizer_dict["eps"], centered=optimizer_dict["centered"]) if optimizer_name == "Rprop": optimizer = optim.Rprop(network_params, lr=optimizer_dict["lr"], eta=optimizer_dict["eta"], step_sizes=optimizer_dict["step_sizes"]) if optimizer_name == "SGD": optimizer = optim.SGD(network_params, weight_decay=optimizer_dict["weight_decay"], momentum=optimizer_dict["momentum"], dampening=optimizer_dict["dampening"], lr=optimizer_dict["lr"], nesterov=optimizer_dict["nesterov"]) if optimizer_name == None: optimizer = None return optimizer
def neuro_fitt_q(epoch, train_env_max_steps, eval_env_max_steps, discount, init_experience=0, seed=None): """Run NFQ.""" CONFIG = AlgorithmConfig( EPOCH=epoch, TRAIN_ENV_MAX_STEPS=train_env_max_steps, EVAL_ENV_MAX_STEPS=eval_env_max_steps, DISCOUNT=discount, INIT_EXPERIENCE=init_experience, INCREMENT_EXPERIENCE=True, HINT_TO_GOAL=True, RANDOM_SEED=seed, TRAIN_RENDER=False, EVAL_RENDER=False, SAVE_PATH="", LOAD_PATH="", USE_TENSORBOARD=False, USE_WANDB=False, ) # Log to File, Console, TensorBoard, W&B logger = get_logger() # Setup environment train_env = CartPoleRegulatorEnv(mode="train", max_steps=train_env_max_steps) eval_env = CartPoleRegulatorEnv(mode="eval", max_steps=eval_env_max_steps) # Fix random seeds if CONFIG.RANDOM_SEED is not None: make_reproducible(CONFIG.RANDOM_SEED, use_numpy=True, use_torch=True) train_env.seed(CONFIG.RANDOM_SEED) eval_env.seed(CONFIG.RANDOM_SEED) #else: # logger.warning("Running without a random seed: this run is NOT reproducible.") # Setup agent nfq_net = NFQNetwork() optimizer = optim.Rprop(nfq_net.parameters()) nfq_agent = NFQAgent(nfq_net, optimizer) # Load trained agent # if CONFIG.LOAD_PATH: # load_models(CONFIG.LOAD_PATH, nfq_net=nfq_net, optimizer=optimizer) # NFQ Main loop # A set of transition samples denoted as D all_rollouts = [] total_cost = 0 if CONFIG.INIT_EXPERIENCE: for _ in range(CONFIG.INIT_EXPERIENCE): rollout, episode_cost = train_env.generate_rollout( None, render=CONFIG.TRAIN_RENDER) all_rollouts.extend(rollout) total_cost += episode_cost stats = EpisodeStats(episode_lengths=np.zeros(CONFIG.EPOCH), episode_rewards=np.zeros(CONFIG.EPOCH)) for epoch in range(CONFIG.EPOCH + 1): # Variant 1: Incermentally add transitions (Section 3.4) # TODO(seungjaeryanlee): Done before or after training? if CONFIG.INCREMENT_EXPERIENCE: new_rollout, episode_cost = train_env.generate_rollout( nfq_agent.get_best_action, render=CONFIG.TRAIN_RENDER) all_rollouts.extend(new_rollout) total_cost += episode_cost state_action_b, target_q_values = nfq_agent.generate_pattern_set( all_rollouts) # Variant 2: Clamp function to zero in goal region # TODO(seungjaeryanlee): Since this is a regulator setting, should it # not be clamped to zero? if CONFIG.HINT_TO_GOAL: goal_state_action_b, goal_target_q_values = train_env.get_goal_pattern_set( ) goal_state_action_b = torch.FloatTensor(goal_state_action_b) goal_target_q_values = torch.FloatTensor(goal_target_q_values) state_action_b = torch.cat([state_action_b, goal_state_action_b], dim=0) target_q_values = torch.cat( [target_q_values, goal_target_q_values], dim=0) loss = nfq_agent.train((state_action_b, target_q_values)) # TODO(seungjaeryanlee): Evaluation should be done with 3000 episodes eval_episode_length, eval_success, eval_episode_cost = nfq_agent.evaluate( eval_env, CONFIG.EVAL_RENDER) if eval_success: break #stats.episode_rewards[epoch] = eval_episode_cost stats.episode_rewards[epoch] = eval_episode_length + 1 stats.episode_lengths[epoch] = eval_episode_length train_env.close() eval_env.close() return stats
model.cuda() log_tr = LogMeters(args.prefix + args.optimizer + '_Train', n_classes) log_te = LogMeters(args.prefix + args.optimizer + '_Test', n_classes) if args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) elif args.optimizer == 'Adam': optimizer = optim.Adam(model.parameters()) elif args.optimizer == 'LBFGS': optimizer = optim.LBFGS(model.parameters()) elif args.optimizer == 'RMSprop': optimizer = optim.RMSprop(model.parameters()) elif args.optimizer == 'Rprop': optimizer = optim.Rprop(model.parameters()) elif args.optimizer == 'Adadelta': optimizer = optim.Adadelta(model.parameters()) elif args.optimizer == 'Adabound': optimizer = adabound.AdaBound(model.parameters(), lr=1e-3, final_lr=0.1) else: raise 'ERROR' # learning rate decay # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4,8,12], gamma=0.9) def train(epoch, data_loader, log): model.train() log.reset() total_loss, total_batches = 0.0, 0.0
# define a neural network using the customised structure net = CasperNetwork(input_neurons, output_neurons) # define loss function loss_func = torch.nn.CrossEntropyLoss() # define optimiser with per layer learning rates # optimiser without any hidden neurons optimiser = optim.Rprop([{ 'params': net.Initial.parameters(), 'lr': L1 }, { 'params': net.output_layer.parameters() }, { 'params': net.old_input_neurons.parameters() }, { 'params': net.old_output_neurons.parameters() }], lr=L3, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) # store all losses for visualisation all_losses = [] previous_loss = None # train a neural network for epoch in range(num_epochs): # Perform forward pass: compute predicted y by passing x to the model.
def rprop_constructor(params): rprop = optim.Rprop(params, lr=1e-2) return StochasticWeightAveraging(rprop, swa_start=1000, swa_freq=1, swa_lr=1e-3)
x_test = [data[0] for data in test] y_test = [data[1] for data in test] # print(f"Total amount of 1's test: {sum(y_test)}/{len(y_test)}") print("creating network") test = PieceSelection() # cwd = os.getcwd() # # string = cwd + f"\\model games_{500} epoch_{10}.pb" # # test.load_state_dict(torch.load(string)) criterion = nn.CrossEntropyLoss() lr = .01 opt = optim.Rprop(test.parameters(), lr=.01) print("training") for epoch in range(36): running_loss = 0.0 opt = optim.SGD(test.parameters(), lr=lr * (.9**epoch), weight_decay=1e-5) for index, data in enumerate(x): # print(index) opt.zero_grad() output = test(torch.tensor([data]).type(torch.FloatTensor)) # print(len(x)) # print(len(y)) # print(output)
def rprop_constructor(params): rprop = optim.Rprop(params, lr=1e-2) return contriboptim.SWA(rprop, swa_start=1000, swa_freq=1, swa_lr=1e-3)
def get_optimizer(type, model, lr, wd): if type == 'SGD': optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=wd, momentum=0.9) elif type == 'ASGD': optimizer = optim.ASGD(model.parameters(), lr=lr, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=wd) elif type == 'Adam': optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd, amsgrad=True) elif type == 'Rprop': optimizer = optim.Rprop(model.parameters(), lr=lr, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) elif type == 'Adagrad': optimizer = optim.Adagrad(model.parameters(), lr=lr, lr_decay=0, weight_decay=wd, initial_accumulator_value=0) elif type == 'Adadelta': optimizer = optim.Adadelta(model.parameters(), lr=lr, rho=0.9, eps=1e-06, weight_decay=wd) elif type == 'RMSprop': optimizer = optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=wd, momentum=0, centered=False) elif typpe == 'Adamax': optimizer = optim.Adamax(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=wd) elif type == 'SparseAdam': optimizer = torch.optim.SparseAdam(params, lr=lr, betas=(0.9, 0.999), eps=1e-08) elif type == 'LBFGS': optimizer = optim.LBFGS(params, lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None) else: optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd, amsgrad=True) return optimizer
num_features = 1 for s in size: num_features *= s return num_features net = [Net(), Net(), Net(), Net()] optimizer = [None] * len(net) criterion = nn.MSELoss() lossrec = np.zeros([4, niter]) outputfin = [None] * 4 # train for ii, neti in enumerate(net): neti.cuda() optimizeri = optim.Rprop(neti.parameters(), lr=0.001) optimizer[ii] = optimizeri # wrap them in Variable inputstn = torch.FloatTensor( data['nninput'][ii] [0:-nvalid]).cuda() # cast tensors to a CUDA datatype labelstn = torch.FloatTensor( data['nnoutput'][ii] [0:-nvalid]).cuda() # cast tensors to a CUDA datatype inputs, labels = Variable(inputstn), Variable(labelstn) # ??? # zero the parameter gradients for jj in range(niter): optimizeri.zero_grad() # forward + backward + optimize
#import net2net as n2n loss_dict = {"categorical_crossentropy":F.nll_loss, "binary_crossentropy": F.binary_cross_entropy, "dice_loss": lf.dice_loss, "dice_loss_modified": lf.dice_loss_modified, "seg_binary_cross_entropy": lf.seg_binary_cross_entropy, "mse":torch.nn.MSELoss(size_average=False), "L1Loss":torch.nn.L1Loss(), "bce_localize":lf.bce_localize} optimizer_dict = {"adadelta":lambda model, lr: optim.Adadelta(model.parameters(), lr=lr), "adam":lambda model, lr: optim.Adam(model.parameters(), lr=lr), "svrg":lambda model, lr: opt.SVRG(model, lr=lr), "sgd":lambda model, lr: optim.SGD(model.parameters(), lr=lr), "rprop":lambda model, lr: optim.Rprop(model.parameters(), lr=lr)} weight_dict = {0:"weight", 1:"bias"} class BaseModel(nn.Module): """INSPIRED BY KERAS AND SCIKIT-LEARN API""" def __init__(self, problem_type="classification", loss_name="categorical_crossentropy", optimizer_name="adadelta"): super(BaseModel, self).__init__() self.loss_name = loss_name self.problem_type = problem_type self.my_optimizer = None self.optimizer_name = optimizer_name
def main(): """Run NFQ.""" # Setup hyperparameters parser = configargparse.ArgParser() parser.add("-c", "--config", required=True, is_config_file=True) parser.add("--EPOCH", type=int) parser.add("--TRAIN_ENV_MAX_STEPS", type=int) parser.add("--EVAL_ENV_MAX_STEPS", type=int) parser.add("--DISCOUNT", type=float) parser.add("--INIT_EXPERIENCE", type=int) parser.add("--INCREMENT_EXPERIENCE", action="store_true") parser.add("--HINT_TO_GOAL", action="store_true") parser.add("--RANDOM_SEED", type=int) parser.add("--TRAIN_RENDER", action="store_true") parser.add("--EVAL_RENDER", action="store_true") parser.add("--SAVE_PATH", type=str, default="") parser.add("--LOAD_PATH", type=str, default="") parser.add("--USE_TENSORBOARD", action="store_true") parser.add("--USE_WANDB", action="store_true") CONFIG = parser.parse_args() if not hasattr(CONFIG, "INCREMENT_EXPERIENCE"): CONFIG.INCREMENT_EXPERIENCE = False if not hasattr(CONFIG, "HINT_TO_GOAL"): CONFIG.HINT_TO_GOAL = False if not hasattr(CONFIG, "TRAIN_RENDER"): CONFIG.TRAIN_RENDER = False if not hasattr(CONFIG, "EVAL_RENDER"): CONFIG.EVAL_RENDER = False if not hasattr(CONFIG, "USE_TENSORBOARD"): CONFIG.USE_TENSORBOARD = False if not hasattr(CONFIG, "USE_WANDB"): CONFIG.USE_WANDB = False print() print( "+--------------------------------+--------------------------------+") print( "| Hyperparameters | Value |") print( "+--------------------------------+--------------------------------+") for arg in vars(CONFIG): print("| {:30} | {:<30} |".format( arg, getattr(CONFIG, arg) if getattr(CONFIG, arg) is not None else "")) print( "+--------------------------------+--------------------------------+") print() # Log to File, Console, TensorBoard, W&B logger = get_logger() if CONFIG.USE_TENSORBOARD: from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter(log_dir="tensorboard_logs") if CONFIG.USE_WANDB: import wandb wandb.init(project="implementations-nfq", config=CONFIG) # Setup environment train_env = CartPoleRegulatorEnv(mode="train") eval_env = CartPoleRegulatorEnv(mode="eval") # Fix random seeds if CONFIG.RANDOM_SEED is not None: make_reproducible(CONFIG.RANDOM_SEED, use_numpy=True, use_torch=True) train_env.seed(CONFIG.RANDOM_SEED) eval_env.seed(CONFIG.RANDOM_SEED) else: logger.warning( "Running without a random seed: this run is NOT reproducible.") # Setup agent nfq_net = NFQNetwork() optimizer = optim.Rprop(nfq_net.parameters()) nfq_agent = NFQAgent(nfq_net, optimizer) # Load trained agent if CONFIG.LOAD_PATH: load_models(CONFIG.LOAD_PATH, nfq_net=nfq_net, optimizer=optimizer) # NFQ Main loop # A set of transition samples denoted as D all_rollouts = [] total_cost = 0 if CONFIG.INIT_EXPERIENCE: for _ in range(CONFIG.INIT_EXPERIENCE): rollout, episode_cost = train_env.generate_rollout( None, render=CONFIG.TRAIN_RENDER) all_rollouts.extend(rollout) total_cost += episode_cost for epoch in range(CONFIG.EPOCH + 1): # Variant 1: Incermentally add transitions (Section 3.4) # TODO(seungjaeryanlee): Done before or after training? if CONFIG.INCREMENT_EXPERIENCE: new_rollout, episode_cost = train_env.generate_rollout( nfq_agent.get_best_action, render=CONFIG.TRAIN_RENDER) all_rollouts.extend(new_rollout) total_cost += episode_cost state_action_b, target_q_values = nfq_agent.generate_pattern_set( all_rollouts) # Variant 2: Clamp function to zero in goal region # TODO(seungjaeryanlee): Since this is a regulator setting, should it # not be clamped to zero? if CONFIG.HINT_TO_GOAL: goal_state_action_b, goal_target_q_values = train_env.get_goal_pattern_set( ) goal_state_action_b = torch.FloatTensor(goal_state_action_b) goal_target_q_values = torch.FloatTensor(goal_target_q_values) state_action_b = torch.cat([state_action_b, goal_state_action_b], dim=0) target_q_values = torch.cat( [target_q_values, goal_target_q_values], dim=0) loss = nfq_agent.train((state_action_b, target_q_values)) # TODO(seungjaeryanlee): Evaluation should be done with 3000 episodes eval_episode_length, eval_success, eval_episode_cost = nfq_agent.evaluate( eval_env, CONFIG.EVAL_RENDER) if CONFIG.INCREMENT_EXPERIENCE: logger.info( "Epoch {:4d} | Train {:3d} / {:4.2f} | Eval {:4d} / {:5.2f} | Train Loss {:.4f}" .format( # noqa: B950 epoch, len(new_rollout), episode_cost, eval_episode_length, eval_episode_cost, loss, )) if CONFIG.USE_TENSORBOARD: writer.add_scalar("train/episode_length", len(new_rollout), epoch) writer.add_scalar("train/episode_cost", episode_cost, epoch) writer.add_scalar("train/loss", loss, epoch) writer.add_scalar("eval/episode_length", eval_episode_length, epoch) writer.add_scalar("eval/episode_cost", eval_episode_cost, epoch) if CONFIG.USE_WANDB: wandb.log({"Train Episode Length": len(new_rollout)}, step=epoch) wandb.log({"Train Episode Cost": episode_cost}, step=epoch) wandb.log({"Train Loss": loss}, step=epoch) wandb.log({"Evaluation Episode Length": eval_episode_length}, step=epoch) wandb.log({"Evaluation Episode Cost": eval_episode_cost}, step=epoch) else: logger.info( "Epoch {:4d} | Eval {:4d} / {:5.2f} | Train Loss {:.4f}". format(epoch, eval_episode_length, eval_episode_cost, loss)) if CONFIG.USE_TENSORBOARD: writer.add_scalar("train/loss", loss, epoch) writer.add_scalar("eval/episode_length", eval_episode_length, epoch) writer.add_scalar("eval/episode_cost", eval_episode_cost, epoch) if CONFIG.USE_WANDB: wandb.log({"Train Loss": loss}, step=epoch) wandb.log({"Evaluation Episode Length": eval_episode_length}, step=epoch) wandb.log({"Evaluation Episode Cost": eval_episode_cost}, step=epoch) if eval_success: logger.info( "Epoch {:4d} | Total Cycles {:6d} | Total Cost {:4.2f}".format( epoch, len(all_rollouts), total_cost)) if CONFIG.USE_TENSORBOARD: writer.add_scalar("summary/total_cycles", len(all_rollouts), epoch) writer.add_scalar("summary/total_cost", total_cost, epoch) if CONFIG.USE_WANDB: wandb.log({"Total Cycles": len(all_rollouts)}, step=epoch) wandb.log({"Total Cost": total_cost}, step=epoch) break # Save trained agent if CONFIG.SAVE_PATH: save_models(CONFIG.SAVE_PATH, nfq_net=nfq_net, optimizer=optimizer) train_env.close() eval_env.close()
model.classifier = classifier if (args.optim == 'SGD'): optimizer = optim.SGD(model.classifier.parameters(), lr=args.rate) elif (args.optim == 'Adadelta'): optimizer = optim.Adadelta(model.classifier.parameters(), lr=args.rate) elif (args.optim == 'Adagrad'): optimizer = optimizer = optim.Adagrad(model.classifier.parameters(), lr=args.rate) elif (args.optim == 'Adam'): optimizer = optim.Adam(model.classifier.parameters(), lr=args.rate) elif (args.optim == 'RMS'): optimizer = optim.RMS(model.classifier.parameters(), lr=args.rate) else: optimizer = optim.Rprop(model.classifier.parameters(), lr=args.rate) if args.loss == 'L1': criterion = nn.L1Loss() elif args.loss == 'NLL': criterion = nn.NLLLoss() elif args.loss == 'Poisson': criterion = nn.PoissonNLLoss() elif args.loss == 'MSE': criterion = nn.MSELoss() else: criterion = nn.CrossEntropyLoss() optimizer.zero_grad() model.classifier = classifier epochs = args.epoch
# F.load_state_dict(torch.load("./model_tmp_pretr_10.pth", map_location=device)) print("Num. of params: {:d}".format(utils.get_parameters_count(model))) data = utils.read_pickle(['t', 'x', 'u'], args.data_path) dataset = utils.generate_torchgeom_dataset(data) if args.batch_size is None: batch_size = len(dataset) else: batch_size = args.batch_size print(dataset) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) optimizer = optim.Rprop(F.parameters(), lr=args.lr, step_sizes=(1e-8, 10.)) loss_fn = nn.MSELoss() # Training ts = dataset[0].t.shape[0] # assumes the same time grid for all sim-s. print(dataset[0].t) for epoch in range(args.epochs): losses = torch.zeros(len(loader)) for i, dp in enumerate(loader): optimizer.zero_grad() edge_index = dp.edge_index pos = dp.pos with torch.no_grad():
dataset = utils.generate_torchgeom_dataset(data, sig=0.0) # ######### sim_inds = [0] #np.random.choice(len(dataset), n_s, replace=False) print(f'sim_inds = {sim_inds}') dataset = [ds for i, ds in enumerate(dataset) if i in sim_inds] print(f'dataset length: {len(dataset)}') # ######### if args.batch_size is None: batch_size = len(dataset) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # Optimizer, loss optimizer = optim.Rprop(F.parameters(), lr=args.lr) loss_fn = nn.MSELoss() # Training ts = dataset[0].t.shape[0] # assumes the same time grid for all sim-s. print("%%%%%%%%%%%%%%%%%%") print(dataset[0]) for epoch in range(args.epochs): losses = torch.zeros(len(loader)) for i, dp in enumerate(loader): optimizer.zero_grad() params_dict = {"edge_index": dp.edge_index.to(device), "pos": dp.pos.to(device)} F.update_params(params_dict)
if __name__ == "__main__": epoches = 100 lr = 1e-2 batch_size = 30 transform = torch.tensor device = torch.device('cpu') trainset = PointDataset('./labels/label.csv', transform=transform) trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True, num_workers=4) testset = PointDataset('./labels/test.csv', transform=transform) testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=True, num_workers=4) # show_original_points() classifier_net = Network(2, 5, 3).to(device) optimizer1 = optim.SGD(classifier_net.parameters(), lr=lr, momentum=0) optimizer2 = optim.Adam(classifier_net.parameters(), lr=lr) optimizer3 = optim.Rprop(classifier_net.parameters(), lr=lr) classifier_net = train(classifier_net, trainloader, testloader, device, lr, optimizer3)
def load_network(self): logger.info("Start loading network, loss function and optimizer") # Load a network # self.net = VGG('VGG11') self.net = ResNet18() # Move network to GPU if needed if self.args.gpu: self.net.to('cuda') # Define the loss function and the optimizer self.criterion = nn.CrossEntropyLoss() if self.args.optimizer.lower() == 'adadelta': logger.info("Selected adadelta as optimizer") self.optimizer = optim.Adadelta(self.net.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) elif self.args.optimizer.lower() == 'adagrad': logger.info("Selected adagrad as optimizer") self.optimizer = optim.Adagrad(self.net.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0) elif self.args.optimizer.lower() == 'adam': logger.info("Selected adam as optimizer") self.optimizer = optim.Adam(self.net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) elif self.args.optimizer.lower() == 'sparseadam': logger.info("Selected sparseadam as optimizer") self.optimizer = optim.SparseAdam(self.net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08) elif self.args.optimizer.lower() == 'adamax': logger.info("Selected adamax as optimizer") self.optimizer = optim.Adamax(self.net.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) elif self.args.optimizer.lower() == 'asgd': logger.info("Selected asgd as optimizer") self.optimizer = optim.ASGD(self.net.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0) elif self.args.optimizer.lower() == 'lbfgs': logger.info("Selected lbfgs as optimizer") self.optimizer = optim.LBFGS(self.net.parameters(), lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None) elif self.args.optimizer.lower() == 'rmsprop': logger.info("Selected rmsprop as optimizer") self.optimizer = optim.RMSprop(self.net.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) elif self.args.optimizer.lower() == 'rprop': logger.info("Selected rprop as optimizer") self.optimizer = optim.Rprop(self.net.parameters(), lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50)) elif self.args.optimizer.lower() == 'sgd': logger.info("Selected sgd as optimizer") self.optimizer = optim.SGD(self.net.parameters(), lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False) else: logger.info("Unknown optimizer given, SGD is chosen instead.") self.optimizer = optim.SGD(self.net.parameters(), lr=0.001, momentum=0.9) logger.info( "Loading network, loss function and %s optimizer was successful", self.args.optimizer)