def __init__(self, input_layer_neurons: int, hidden_layer_neurons: list, output_layer_neurons: int, learning_rate=0.002, decay_rate=0.99, dropout_rate=0.0, tanh=False): super(CriticNetwork, self).__init__() self.layers = CommonHelpers.create_network( input_layer_neurons, hidden_layer_neurons, output_layer_neurons, dropout_rate, tanh) self.optimizer = rmsprop.RMSprop(self.parameters(), lr=learning_rate, weight_decay=decay_rate)
def __init__(self, state_space, action_space, args): self.state_action_space = state_space + action_space self.device = torch.device( "cuda" if args.cuda and torch.cuda.is_available() else "cpu") self.discriminator = Discriminator_Net( self.state_action_space, args.dis_hidden_dim).to(self.device) self.dis_optim = rmsprop.RMSprop( self.discriminator.parameters(), lr=args.dis_rms_lr, alpha=args.dis_rms_alpha, eps=args.dis_rms_eps, weight_decay=args.dis_rms_weight_decay, momentum=args.dis_rms_momentum)
def __init__(self, device): self.epsilon = EPS_START #set up policy and target DQN, optimizer and replay memory self.policy_net = DQN().to(device) self.target_net = DQN().to(device) self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=LEARNING_RATE, momentum=GRAD_MOM, eps=MIN_SQ_GRAD) self.memory = ReplayMemory() # initialize target net self.update_target_net()
def train_model(self): """ Train the AutoEncoder model """ # Fetch the training data train_dataloader = torch.utils.data.DataLoader( self.train_dataset, batch_size=_BATCH_SIZE, num_workers=_NUM_WORKERS) # Keep track of progress counter = [] loss_history = [] iteration_number = 0 # Set the loss and optimizer functions criterion = nn.MSELoss() optimizer = rmsprop.RMSprop(self.model.parameters(), lr=_LEARNING_RATE) for epoch in range(0, _EPOCHS): for i, data in enumerate(train_dataloader, 0): # Get data level_data = data level_data = level_data.to(self.device) # Clear gradients optimizer.zero_grad() # Perform a forward pass outputs = self.model(level_data) # Calculate the loss loss = criterion(outputs, level_data) # Perform a backward pass loss.backward() # Parameter update - optimization step optimizer.step() if i % 10 == 0: print("Epoch number {}: Current loss {}\n".format( epoch, loss.item())) iteration_number += 10 counter.append(iteration_number) loss_history.append(loss.item()) # Plot the loss history self.show_plot(counter, loss_history)
def __init__(self, state_dim, action_dim, args, num_dynamic=1): super(DynamicModel, self).__init__() self.device = torch.device("cuda:0" if args.cuda else "cpu") self.dy_network = DynamicNetwork( state_dim, action_dim, args.dynamic_hidden_dim).to(self.device) self.num_dynamic = num_dynamic # self.dy_network_optim = Adam(self.dy_network.parameters(), lr=args.lr, eps=1e-9) # lr:learning rate # self.dy_network_optim = adadelta.Adadelta(self.dy_network.parameters(), # lr=args.lr, eps=args.eps, weight_decay=args.weight_decay) # self.dy_network_optim = SGD(self.dy_network.parameters(), lr=args.lr2) self.dy_network_optim = rmsprop.RMSprop( self.dy_network.parameters(), lr=args.dy_rms_lr, alpha=args.dy_rms_alpha, eps=args.dy_rms_eps, weight_decay=args.dy_rms_weight_decay, momentum=args.dy_rms_momentum)
def torch_get_optimizer(optimizer_type, lr, params, momentum=0., weight_decay=0.): # momentum=0.9 if optimizer_type == OPTIMIZER_SGD: return torch.optim.SGD(params, lr, momentum, weight_decay=weight_decay) elif optimizer_type == OPTIMIZER_Adagrad: return torch_opt_adagrad.Adagrad(params, lr, weight_decay=weight_decay) elif optimizer_type == OPTIMIZER_Adadelta: return torch_opt_adadelta.Adadelta(params, lr, weight_decay=weight_decay) elif optimizer_type == OPTIMIZER_RMSprop: return torch_opt_rmsprop.RMSprop(params, lr, weight_decay=weight_decay, momentum=momentum) # return torch_opt_rmsprop.RMSprop(params, lr, eps=1e-6, weight_decay=0.99, momentum=momentum) else: # optimizer_type == OPTIMIZER_Adam return torch.optim.Adam(params, lr, weight_decay=weight_decay)
def str2optim(optimiser: Optimiserlike, model: Module, lr: float) -> Optimiser: if not isinstance(optimiser, str): return optimiser elif optimiser == 'adam': return adam.Adam(model.parameters(), lr=lr) elif optimiser == 'adadelta': return adadelta.Adadelta(model.parameters(), lr=lr) elif optimiser == 'adagrad': return adagrad.Adagrad(model.parameters(), lr=lr) elif optimiser == 'adamw': return adamw.AdamW(model.parameters(), lr=lr) elif optimiser == 'sparse_adam': return sparse_adam.SparseAdam(model.parameters(), lr=lr) elif optimiser == 'adamax': return adamax.Adamax(model.parameters(), lr=lr) elif optimiser == 'rmsprop': return rmsprop.RMSprop(model.parameters(), lr=lr) elif optimiser == 'sgd': return sgd.SGD(model.parameters(), lr=lr) else: raise RuntimeError(f'Optimiser {optimiser} not found.')
def train_net(net, device, data_path, epochs=40, batch_size=1, lr=0.00001): # 加载训练集 isbi_dataset = ISBI_Loader(data_path) train_loader = torch.utils.data.DataLoader(dataset=isbi_dataset, batch_size=batch_size, shuffle=True) # 定义RMSprop模型优化算法 optimizer = rms.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) # 定义Loss算法 criterion = nn.BCEWithLogitsLoss() # best_loss统计,初始化为正无穷 best_loss = float('inf') # 训练epichs次 for epoch in range(epochs): # 训练模式 net.train() # 按照batch_size开始训练 for image, label in train_loader: optimizer.zero_grad() # 将数据拷贝到device中 image = image.to(device=device, dtype=torch.float32) label = label.to(device=device, dtype=torch.float32) # 使用网络参数,输出预测结果 pred = net(image) print(pred) # 计算Loss loss = criterion(pred, label) print('Loss/train', loss.item()) # 保存loss值最小的网络参数 if loss < best_loss: best_loss = loss torch.save(net.state_dict(), './best_model.pth') # 更新参数 loss.backward() optimizer.step()
plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k") plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") plt.xlim(xmin=0, xmax=len(ave_grads)) plt.xlabel("Layers") plt.ylabel("average gradient") plt.title("Gradient flow") plt.grid(True) plt.show() if __name__ == "__main__": from torch.optim import rmsprop testInput = torch.randn(size=[5, 3, 32, 32]).float() model = CombineNet(3, 10, 0.5, 1, 2) optimizer = rmsprop.RMSprop(model.parameters(), 5e-4, momentum=0.9, weight_decay=1e-5) outputs = model(testInput) print(outputs) lossCri = nn.CrossEntropyLoss(reduction="mean") import numpy as np loss = lossCri(outputs, torch.from_numpy(np.array([0, 1, 2, 3, 4])).long()) loss.backward() # optimizer.zero_grad() # optimizer.step() # outputs2 = model(testInput) # loss = lossCri(outputs2, torch.from_numpy(np.array([0, 1, 2, 3, 4])).long()) # loss.backward() plot_grad_flow(model.named_parameters())
for name, param in model_ft.named_parameters(): if param.requires_grad: print('\t', name) for name, param in model_ft.named_parameters(): if param.requires_grad and name[0] == 'c': continue else: param.requires_grad = False print('Params to learn after:') for name, param in model_ft.named_parameters(): if param.requires_grad: print('\t', name) optimizer_ft = rmsprop.RMSprop(params_to_update, lr=0.001) criterion = torch.nn.BCELoss() model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs) print('-' * 64) print() print('-' * 64) unfrozen = [ 'features.28.weight', 'features.28.bias', 'features.26.weight',
import torch from torch.optim import rmsprop # Test 1 torch.manual_seed(0) torch.set_printoptions(precision=6) param = torch.rand(1, 2, 3, 4) param.grad = torch.rand(1, 2, 3, 4) print("Parameter: ", param) print("Gradeient: ", param.grad) # First step opt = rmsprop.RMSprop([param], lr=0.1, alpha=0.9, eps=0.1, weight_decay=0.1, momentum=0.1, centered=True) opt.step() print("Parameter (after first step): ", param) # Second step opt.step() print("Parameter (after second step): ", param) # Test 2 param = torch.rand(1, 2, 3, 4) param.grad = torch.rand(1, 2, 3, 4) print("Parameter: ", param) print("Gradeient: ", param.grad)