def main(): args = arg_parser() if args.mode == "train": env = environment.make(args.env, args) if args.networks == "MLP": nn = MLP(env.observation_space.shape[0], env.action_space, args.n_frames) elif args.networks == "CONV": nn = CONV(args.n_frames, env.action_space) optimizer = SharedAdam(nn.parameters()) threads = [] thread = mp.Process(target=test, args=(args, nn)) thread.start() threads.append(thread) for i in range(0, args.n_workers): thread = mp.Process(target=train, args=(i, args, nn, optimizer)) thread.start() threads.append(thread) for thread in threads: thread.join() elif args.mode == "test": evaluate(args)
def __init__(self, obs_dim, action_dim, hiddens_actor, hiddens_critic, layer_norm=False, memory_size=50000): self.obs_dim = obs_dim self.action_dim = action_dim self.noise_stddev = 1. self.noise_stddev_decrease = 5e-4 self.noise_stddev_lower = 5e-2 actor_activations = [dy.tanh for _ in range(len(hiddens_actor))] + [dy.tanh] critic_activations = [dy.tanh for _ in range(len(hiddens_critic))] + [None] self.actor = MLP(inpt_shape=(obs_dim,), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic = MLP(inpt_shape=(obs_dim + action_dim,), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target = MLP(inpt_shape=(obs_dim,), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic_target = MLP(inpt_shape=(obs_dim + action_dim,), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target.update(self.actor, soft=False) self.critic_target.update(self.critic, soft=False) self.trainer_actor = dy.AdamTrainer(self.actor.pc) self.trainer_critic = dy.AdamTrainer(self.critic.pc) self.trainer_actor.set_learning_rate(1e-4) self.trainer_critic.set_learning_rate(1e-3) self.memory = Memory(memory_size)
def main(): parser = argparse.ArgumentParser(description='Pytorch example: CIFAR-10') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--model', '-m', default='result/model_final', help='Path to the model for test') parser.add_argument('--image', '-i', default='image.png', help='Path to the model for test') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('') # Set up a neural network to test net = MLP(args.unit, 28*28, 10) # Load designated network weight net.load_state_dict(torch.load(args.model)) # Set model to GPU device = 'cpu' if args.gpu >= 0: # Make a specified GPU current device = 'cuda:' + str(args.gpu) net = net.to(device) # Load image transform = transforms.Compose( [ transforms.Grayscale(), transforms.Resize((28, 28)), transforms.ToTensor()] ) image = transform(Image.open(args.image, 'r')).unsqueeze(0).to(device) with torch.no_grad(): # Reshape the input image = image.view(-1, 28*28) if args.gpu >= 0: image = image.to(device) # Forward outputs = net(image) # Predict the label # _, predicted = torch.max(outputs, 1) _, predicted = torch.topk(outputs, 3) # Print the result print('Predicted label : {0}, {1}, {2}'.format(predicted[0][0].tolist(),predicted[0][1].tolist(),predicted[0][2].tolist()))
def test(args, nn): ptitle('Test Agent') log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) env = environment.make(args.env, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.model = MLP(player.env.observation_space.shape[0], player.env.action_space, args.n_frames) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.model.eval() max_score = 0 while True: if player.done: player.model.load_state_dict(nn.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, reward {1}, average reward {2:.4f}".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, reward_mean)) if reward_sum >= max_score: max_score = reward_sum state_to_save = player.model.state_dict() torch.save(state_to_save, '{}.dat'.format(args.model_save_dir)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float()
def evaluate(args): torch.set_default_tensor_type('torch.FloatTensor') saved_state = torch.load( '{}.dat'.format(args.model_load_dir), map_location=lambda storage, loc: storage ) log = {} setup_logger('{}_eval_log'.format(args.env), r'{0}{1}_eval_log'.format( args.log, args.env)) log['{}_eval_log'.format(args.env)] = logging.getLogger( '{}_eval_log'.format(args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_eval_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) env = environment.make("{}".format(args.env), args) num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) if args.networks == "MLP": player.model = MLP(env.observation_space.shape[0], env.action_space, args.n_frames) elif args.networks == "CONV": player.model = CONV(args.n_frames, env.action_space) if True: player.env = gym.wrappers.Monitor( player.env, "{}_monitor".format(args.env), lambda episode_id: True, force=True) player.model.load_state_dict(saved_state) player.model.eval() for i_episode in range(args.rollout): player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.eps_len = 0 reward_sum = 0 while True: if args.render: if i_episode % 1 == 0: player.env.render() player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_eval_log'.format(args.env)].info( "reward, {0}, average reward, {1:.4f}".format(reward_sum, reward_mean)) break
def from_directory(path): # check for .pkl files in the given directoy ndr = NetworkDriver() file_list = glob.glob(os.path.join(path, "*.pkl")) for pickle_file in file_list: filename = os.path.basename(pickle_file) if filename.startswith('accel') and ndr.accel is None: ndr.accel = MLP.from_file(pickle_file) print "loaded accel. ", elif filename.startswith('steering') and ndr.steering is None: ndr.steering = MLP.from_file(pickle_file) print "loaded steering. ", elif filename.startswith('brake') and ndr.brake is None: ndr.brake = MLP.from_file(pickle_file) print "loaded brake. ", elif filename.startswith('gear') and ndr.gear is None: ndr.gear = MLP.from_file(pickle_file) ndr.gear.regression = False print "loaded gear. ", print "" return ndr
def predict(model_path, im_path): ''' Test procedure --------------- :param model_path: path of the saved model :param im_path: path of an image ''' # TODO 3: load configurations from saved model, initialize the model. # Note: you can complete this section by referring to Part 4: test. # step 1: load configurations from saved model using torch.load(model_path) # and get the configs dictionary, configs = checkpoint['configs'], # then get each config from configs, eg., norm_size = configs['norm_size'] checkpoint = torch.load(model_path) configs = checkpoint['configs'] norm_size = configs['norm_size'] output_size = configs['output_size'] hidden_size = configs['hidden_size'] n_layers = configs['n_layers'] act_type = configs['act_type'] # step 2: initialize the model by MLP() model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size, n_layers, act_type) # step 3: load model parameters we saved in model_path # hint: similar to what we do in Part 4: test. model.load_state_dict(checkpoint['state_dict']) # End TODO 3 # enter the evaluation mode model.eval() # image pre-processing, similar to what we do in ListDataset() transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(norm_size), transforms.ToTensor() ]) # image pre-processing, similar to what we do in ListDataset() im = cv2.imread(im_path) im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) im = transform(im) im.sub_(0.5).div_(0.5) # input im into the model with torch.no_grad(): input = im.view(1, -1) out = model(input) prediction = out.argmax(1)[0].item() # convert index of prediction to the corresponding character letters = string.ascii_letters[-26:] # ABCD...XYZ prediction = letters[prediction] print('Prediction: {}'.format(prediction))
def __init__(self, obs_dim, action_dim, hiddens_actor, hiddens_critic, layer_norm=False, memory_size=50000): self.obs_dim = obs_dim self.action_dim = action_dim self.noise_stddev = 1. self.noise_stddev_decrease = 5e-4 self.noise_stddev_lower = 5e-2 actor_activations = [dy.tanh for _ in range(len(hiddens_actor))] + [dy.tanh] critic_activations = [dy.tanh for _ in range(len(hiddens_critic))] + [None] self.actor = MLP(inpt_shape=(obs_dim, ), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic = MLP(inpt_shape=(obs_dim + action_dim, ), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target = MLP(inpt_shape=(obs_dim, ), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic_target = MLP(inpt_shape=(obs_dim + action_dim, ), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target.update(self.actor, soft=False) self.critic_target.update(self.critic, soft=False) self.trainer_actor = dy.AdamTrainer(self.actor.pc) self.trainer_critic = dy.AdamTrainer(self.critic.pc) self.trainer_actor.set_learning_rate(1e-4) self.trainer_critic.set_learning_rate(1e-3) self.memory = Memory(memory_size)
class DDPG: def __init__(self, obs_dim, action_dim, hiddens_actor, hiddens_critic, layer_norm=False, memory_size=50000): self.obs_dim = obs_dim self.action_dim = action_dim self.noise_stddev = 1. self.noise_stddev_decrease = 5e-4 self.noise_stddev_lower = 5e-2 actor_activations = [dy.tanh for _ in range(len(hiddens_actor))] + [dy.tanh] critic_activations = [dy.tanh for _ in range(len(hiddens_critic))] + [None] self.actor = MLP(inpt_shape=(obs_dim, ), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic = MLP(inpt_shape=(obs_dim + action_dim, ), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target = MLP(inpt_shape=(obs_dim, ), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic_target = MLP(inpt_shape=(obs_dim + action_dim, ), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target.update(self.actor, soft=False) self.critic_target.update(self.critic, soft=False) self.trainer_actor = dy.AdamTrainer(self.actor.pc) self.trainer_critic = dy.AdamTrainer(self.critic.pc) self.trainer_actor.set_learning_rate(1e-4) self.trainer_critic.set_learning_rate(1e-3) self.memory = Memory(memory_size) def act(self, obs): dy.renew_cg() action = self.actor(obs).npvalue() if self.noise_stddev > 0: noise = np.random.randn(self.action_dim) * self.noise_stddev action += noise return np.clip(action, -1, 1) def store(self, exp): self.memory.store(exp) def learn(self, batch_size): exps = self.memory.sample(batch_size) obss, actions, rewards, obs_nexts, dones = self._process(exps) # Update critic dy.renew_cg() target_actions = self.actor_target(obs_nexts, batched=True) target_values = self.critic_target(dy.concatenate( [dy.inputTensor(obs_nexts, batched=True), target_actions]), batched=True) target_values = rewards + 0.99 * target_values.npvalue() * (1 - dones) dy.renew_cg() values = self.critic(np.concatenate([obss, actions]), batched=True) loss = dy.mean_batches( (values - dy.inputTensor(target_values, batched=True))**2) loss_value_critic = loss.npvalue() loss.backward() self.trainer_critic.update() # update actor dy.renew_cg() actions = self.actor(obss, batched=True) obs_and_actions = dy.concatenate( [dy.inputTensor(obss, batched=True), actions]) loss = -dy.mean_batches(self.critic(obs_and_actions, batched=True)) loss_value_actor = loss.npvalue() loss.backward() self.trainer_actor.update() self.noise_stddev = ( self.noise_stddev - self.noise_stddev_decrease ) if self.noise_stddev > self.noise_stddev_lower else self.noise_stddev_lower self.actor_target.update(self.actor, soft=True) self.critic_target.update(self.critic, soft=True) return loss_value_actor + loss_value_critic # data in memory: [memory_size, exp], exp: [obs, action, reward, obs_next, done] # output: [obss, actions, rewards, obs_nexts, dones], 'X's: [x, batch_size] @staticmethod def _process(exps): n = len(exps) ret = [] for i in range(5): ret.append([]) for j in range(n): ret[i].append(exps[j][i]) ret = [np.transpose(arr) for arr in ret] return ret @property def epsilon(self): return self.noise_stddev
def test(model_path, im_dir='data/character_classification/images', test_file_path='data/character_classification/test.json', batch_size=8, device='cpu'): ''' Test procedure --------------- :param model_path: path of the saved model :param im_dir: path to directory with images :param test_file_path: file with test image paths and labels :param batch_size: test batch size :param device: 'cpu' or 'cuda' ''' # load configurations from saved model, initialize and test the model checkpoint = torch.load(model_path) configs = checkpoint['configs'] norm_size = configs['norm_size'] output_size = configs['output_size'] hidden_size = configs['hidden_size'] n_layers = configs['n_layers'] act_type = configs['act_type'] # initialize the model by MLP() model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size, n_layers, act_type) # load model parameters we saved in model_path model.load_state_dict(checkpoint['state_dict']) model = model.to(device) print('[Info] Load model from {}'.format(model_path)) # enter the evaluation mode model.eval() # test loader testloader = dataLoader(im_dir, test_file_path, norm_size, batch_size) # run the test process n_correct = 0. n_ims = 0. logits = [] all_labels = [] with torch.no_grad( ): # we do not need to compute gradients during test stage for ims, labels in testloader: ims, labels = ims.to(device), labels.type(torch.float).to(device) input = ims.view(ims.size(0), -1) out = model(input) predictions = out.argmax(1) n_correct += torch.sum(predictions == labels) n_ims += ims.size(0) logits.append(out) all_labels.append(labels) logits = torch.cat(logits, dim=0).detach().cpu().numpy() all_labels = torch.cat(all_labels, dim=0).cpu().numpy() tsne = TSNE(n_components=2, init='pca') Y = tsne.fit_transform(logits) letters = list(string.ascii_letters[-26:]) Y = (Y - Y.min(0)) / (Y.max(0) - Y.min(0)) for i in range(len(all_labels)): if (all_labels[i] < 26): c = plt.cm.rainbow(float(all_labels[i]) / 26) plt.text(Y[i, 0], Y[i, 1], s=letters[int(all_labels[i])], color=c) plt.show() print('[Info] Test accuracy = {:.1f}%'.format(100 * n_correct / n_ims))
def train_val(im_dir, train_file_path, val_file_path, hidden_size, n_layers, act_type, norm_size, n_epochs, batch_size, n_letters, lr, optim_type, momentum, weight_decay, valInterval, device='cpu'): ''' The main training procedure ---------------------------- :param im_dir: path to directory with images :param train_file_path: file list of training image paths and labels :param val_file_path: file list of validation image paths and labels :param hidden_size: a list of hidden size for each hidden layer :param n_layers: number of layers in the MLP :param act_type: type of activation function, can be none, sigmoid, tanh, or relu :param norm_size: image normalization size, (height, width) :param n_epochs: number of training epochs :param batch_size: batch size of training and validation :param n_letters: number of classes, in this task it is 26 English letters :param lr: learning rate :param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta' :param momentum: only used if optim_type == 'sgd' :param weight_decay: the factor of L2 penalty on network weights :param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available ''' # training and validation data loader trainloader = dataLoader(im_dir, train_file_path, norm_size, batch_size) valloader = dataLoader(im_dir, val_file_path, norm_size, batch_size) # TODO 1: initialize the MLP model and loss function # what is the input size of the MLP? # hint 1: we convert an image to a vector as the input of the MLP, # each image has shape [norm_size[0], norm_size[1]] # hint 2: Input parameters for MLP: input_size, output_size, hidden_size, n_layers, act_type model = MLP(norm_size[0] * norm_size[1], n_letters, hidden_size, n_layers, act_type) # loss function cal_loss = CrossEntropyLoss.apply # End TODO 1 # put the model on CPU or GPU model = model.to(device) # optimizer if optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) elif optim_type == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'adam': optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay) else: print( '[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta' ) raise NotImplementedError # training # to save loss of each training epoch in a python "list" data structure losses = [] for epoch in range(n_epochs): # set the model in training mode model.train() # to save total loss in one epoch total_loss = 0. #TODO 2: calculate losses and train the network using the optimizer for step, (ims, labels) in enumerate(trainloader): # get a batch of data # step 1: set data type and device ims = ims.to(device) labels = labels.to(device) # step 2: convert an image to a vector as the input of the MLP ims = ims.view(batch_size, norm_size[0] * norm_size[1]) # hint: clear gradients in the optimizer optimizer.zero_grad() # step 3: run the model which is the forward process pred = model(ims) # step 4: compute the loss, and call backward propagation function loss = cal_loss(pred, labels) loss.backward() # step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number # this operation is not differentiable total_loss += loss.item() # step 6: call a function, optimizer.step(), to update the parameters of the model optimizer.step() # End TODO 2 # average of the total loss for iterations avg_loss = total_loss / len(trainloader) losses.append(avg_loss) print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss)) # validation if (epoch + 1) % valInterval == 0: # set the model in evaluation mode model.eval() n_correct = 0. # number of images that are correctly classified n_ims = 0. # number of total images with torch.no_grad( ): # we do not need to compute gradients during validation # calculate losses for validation data and do not need train the network for ims, labels in valloader: # set data type and device ims, labels = ims.to(device), labels.type( torch.float).to(device) # convert an image to a vector as the input of the MLP input = ims.view(ims.size(0), -1) # run the model which is the forward process out = model(input) # get the predicted value by the output using out.argmax(1) predictions = out.argmax(1) # sum up the number of images correctly recognized and the total image number n_correct += torch.sum(predictions == labels) n_ims += ims.size(0) # show prediction accuracy print('Epoch {:02d}: validation accuracy = {:.1f}%'.format( epoch + 1, 100 * n_correct / n_ims)) # save model parameters in a file model_save_path = 'saved_models/recognition.pth'.format(epoch + 1) torch.save( { 'state_dict': model.state_dict(), 'configs': { 'norm_size': norm_size, 'output_size': n_letters, 'hidden_size': hidden_size, 'n_layers': n_layers, 'act_type': act_type } }, model_save_path) print('Model saved in {}\n'.format(model_save_path)) # draw the loss curve plot_loss(losses)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('building models...') # vis.log('building models...') # clf = models.resnet50(pretrained=True) # clf.avgpool = nn.AdaptiveAvgPool2d(1) # for param in clf.parameters(): # param.requires_grad = False # clf.to(device) # clf.eval() # clf_norm = Normalization(IMAGENET_MEAN, IMAGENET_STD) # clf_norm.to(device) # clf_norm.eval() """fixed""" net = MLP(n_layers=args.n_layers, width=args.width) net.to(device) net = nn.DataParallel(net) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.momentum, 0.999), weight_decay=args.wd) # scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # ckpt = torch.load(args.checkpoint) # net.load_state_dict(ckpt['model']) # optimizer.load_state_dict(ckpt['optimizer']) # train_loss = ckpt['train_loss'] # test_loss = ckpt['test_loss']
from data import MyDataset from network import MLP from tqdm import tqdm from tqdm.auto import trange import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from loss import FocalLoss from loss import cross_entropy from utils import visualize_3D from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MLP() print(model) dataset = MyDataset() trainloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False) criterion = cross_entropy # criterion = FocalLoss(num_classes=1) optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # loop over the dataset multiple times epochs = 50 def train():
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('building models...') # vis.log('building models...') # clf = models.resnet50(pretrained=True) # clf.avgpool = nn.AdaptiveAvgPool2d(1) # for param in clf.parameters(): # param.requires_grad = False # clf.to(device) # clf.eval() # clf_norm = Normalization(IMAGENET_MEAN, IMAGENET_STD) # clf_norm.to(device) # clf_norm.eval() """fixed""" net = MLP(n_layers=args.n_layers, width=args.width) net.to(device) net = nn.DataParallel(net) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(args.momentum, 0.999), weight_decay=args.wd) # scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # ckpt = torch.load(args.checkpoint) # net.load_state_dict(ckpt['model']) # optimizer.load_state_dict(ckpt['optimizer']) # train_loss = ckpt['train_loss'] # test_loss = ckpt['test_loss'] # test_acc = ckpt['test_acc'] # epochs = ckpt['epoch']
print("Load training data ... ") train_folder = os.path.join(proj_path, 'data', 'featureset_cnn_twostep{}'.format(option), 'train') trainset = load_data(train_folder) trainloader = DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=drop_last_batch ) # batch size is usually set to 4, for debug, we can use 1 print("Initialize Network ... ") if '2D' in option: net = MLP(input_lenth=4096) elif '5D' in option: net = MLP(input_lenth=10240) elif '4D' in option: net = MLP(input_lenth=8192) train_GPU = True device = torch.device("cuda" if ( torch.cuda.is_available() and train_GPU) else "cpu") print(device) net.to(device) print("Loaded Network to GPU ... ") optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) if not ft.dir_exist( os.path.join(proj_path, "checkpoint", "MLPTwoStep{}".format(option))):
def main(): parser = argparse.ArgumentParser(description='Hamiltonian Descent Methods') parser.add_argument('--batchsize', '-b', type=int, default=100) parser.add_argument('--epoch', '-e', type=int, default=200) parser.add_argument('--gpu', '-g', type=int, default=-1, choices=[-1, 0, 1, 2, 3]) parser.add_argument('--out', '-o', type=str, default='verification/') parser.add_argument('--data', '-d', type=str, default='mnist', choices=['mnist', 'cifar10']) parser.add_argument('--method', '-m', type=str, default='sem', choices=['adam', 'sgd', 'fem', 'sem']) args = parser.parse_args() # Experiment setup if args.data == 'mnist': model = MLP(n_units=500, n_out=10) train, test = chainer.datasets.get_mnist() elif args.data == 'cifar10': model = NNet(n_out=10) train, test = chainer.datasets.get_cifar10() model = L.Classifier(model) chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Optimizer if args.method == 'adam': optimizer = chainer.optimizers.Adam() elif args.method == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=0.01) elif args.method == 'fem': optimizer = Hamiltonian.Hamiltonian(approx='first') elif args.method == 'sem': optimizer = Hamiltonian.Hamiltonian(approx='second') optimier.setup(model) # iterator train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Setup a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) if args.method == 'sgd': trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(50, 'epoch')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def train(train_data, train_target, test_data, test_target, learning_rate=0.1, epochs=1000, file_name=None): bpn = MLP(input_size=10, output_size=2, hidden_size_1=40, hidden_size_2=20, learning_rate=learning_rate) # x = np.array([1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0], dtype=np.float64).reshape(-1,1) # print(bpn.forward(x)) # t = np.array([0,1,0,0,0,0]).reshape(-1,1) # bpn.backward(t) parell_table = parellize_target(train_target) indices_1, target = load_dataset(train_target, parell_table) indices_2, data = load_dataset(train_data) assert indices_1 == indices_2, 'label does not match data' pdata, ptarget, categories = preprocessing(data, target) parell_table_test = parellize_target(test_target) indices_1, target = load_dataset(test_target, parell_table_test) indices_2, data = load_dataset(test_data) assert indices_1 == indices_2, 'label does not match data' pdata_test, ptarget_test, categories_test = preprocessing(data, target) assert parell_table == parell_table_test, "parell table not identical" # data, target, _ = LoadData("./GlassData.csv") # pdata, ptarget, categories = Preprocessing(data, target) train_size = pdata.shape[1] test_size = pdata_test.shape[1] traininput = pdata[:, :int(0.9 * train_size)] traintarget = ptarget[:, :int(0.9 * train_size)] validationinput = pdata[:, int(0.9 * train_size):int(train_size)] validationtarget = ptarget[:, int(0.9 * train_size):int(train_size)] testinput = pdata_test testtarget = ptarget_test errors = [] accuracies_of_train = [] accuracies_of_vali = [] accuracies_of_test = [] with open('log/' + file_name + '.log', 'w+') as file: for e in range(epochs): train_correct_cnt = 0 for i in range(traininput.shape[1]): out = bpn.forward(traininput[:, i:i + 1]) bpn.backward(traintarget[:, i:i + 1], learning_rate=1 - 0.1 * e / epochs) if np.argmax(out) == np.argmax(traintarget[:, i:i + 1]): train_correct_cnt += 1 accuracy = train_correct_cnt / traininput.shape[1] if e % 50 == 0: file.write("train accuracy = {}".format(accuracy) + '\n') accuracies_of_train.append(accuracy) error = 0 vali_correct_cnt = 0 for i in range(validationinput.shape[1]): out = bpn.forward(validationinput[:, i:i + 1]) error += np.sum(np.abs(out - validationtarget[:, i:i + 1])) if np.argmax(out) == np.argmax(validationtarget[:, i:i + 1]): vali_correct_cnt += 1 # print("error = {}".format(error)) errors.append(error) accuracy = vali_correct_cnt / validationinput.shape[1] if e % 50 == 0: file.write("cross-validation accuracy = {}".format(accuracy) + '\n') accuracies_of_vali.append(accuracy) # if accuracy > 0.78: # break test_correct_cnt = 0 for i in range(testinput.shape[1]): out = bpn.test(testinput[:, i:i + 1]) if np.argmax(out) == np.argmax(testtarget[:, i:i + 1]): # print(np.argmax(out)) test_correct_cnt += 1 accuracy = test_correct_cnt / testinput.shape[1] if e % 50 == 0: file.write("test accuracy= {}".format(accuracy) + '\n') accuracies_of_test.append(accuracy) # print("max accuracy of validation set is {}".format(max(accuracies_of_vali))) # x = range(len(accuracies_of_vali)) # y = accuracies_of_vali # print(min(y)) # plt.plot(x,y) # plt.savefig('lr_train_1_test_1' + (str)(learning_rate) + '.png') # plt.show() plt.title('accuracies') plt.plot(accuracies_of_train, label='train_acc') plt.plot(accuracies_of_vali, label='vali_acc') plt.plot(accuracies_of_test, label='test_acc') x_axis_len = max(len(accuracies_of_train), len(accuracies_of_vali), len(accuracies_of_test)) plt.text(0, 1, 'max train accuracy: ' + (str)(max(accuracies_of_train)), transform=plt.gcf().transFigure) plt.text(0, 0.9, 'max vali accuracy: ' + (str)(max(accuracies_of_vali)), transform=plt.gcf().transFigure) plt.text(0, 0.8, 'max test accuracy: ' + (str)(max(accuracies_of_test)), transform=plt.gcf().transFigure) plt.legend(loc='lower right') plt.subplots_adjust(left=0.3) plt.savefig('result/' + file_name + '_lr' + (str)(learning_rate) + '_e' + (str)(epochs) + '.png') plt.close()
def train(model, X, y, epoch): n = X.shape[0] loss_all = [] k = 0 for k in range(epoch): average_loss = 0 for i in range(n): x = X[i].reshape((-1, 1)) prediction = model.forward(x) loss = mse_loss(prediction, y[i]) average_loss += loss model.zero_gradient() model.backpropagation(y[i]) model.optimize('GD', lr=0.0001, batch_size=1) average_loss = average_loss / n loss_all.append(average_loss) print('Epoch {} average_loss:{:.6f}'.format(k, average_loss)) if __name__ == '__main__': paras = [[2, 1024, 'sigmoid'], [1024, 1024, 'sigmoid'], [1024, 1024, 'sigmoid'], [1024, 1, 'identity']] model = MLP(len(paras), paras) x1 = np.random.uniform(-5., 5., size=50) x2 = np.random.uniform(-5., 5., size=50) X, y = gen_data(x1, x2) train(model, X, y, epoch=100)
def train(rank, args, nn, optimizer): ptitle('Training Agent: {}'.format(rank)) env = environment.make(args.env, args) env.seed(RANDOM_SEED + rank) player = Agent(None, env, args, None) player.model = MLP(player.env.observation_space.shape[0], player.env.action_space, args.n_frames) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.model.train() while True: player.model.load_state_dict(nn.state_dict()) if player.done: player.cx = Variable(torch.zeros(1, 128)) player.hx = Variable(torch.zeros(1, 128)) else: player.cx = Variable(player.cx.data) player.hx = Variable(player.hx.data) for step in range(args.n_steps): player.action_train() if player.done: break if player.done: player.eps_len = 0 state = player.env.reset() player.state = torch.from_numpy(state).float() R = torch.zeros(1, 1) if not player.done: state = player.state value, _, _, _ = player.model( (Variable(state), (player.hx, player.cx))) R = value.data player.values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(player.rewards))): R = args.gamma * R + player.rewards[i] advantage = R - player.values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimataion # print(player.rewards[i]) delta_t = player.rewards[i] + args.gamma * \ player.values[i + 1].data - player.values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ (player.log_probs[i].sum() * Variable(gae)) - \ (0.01 * player.entropies[i].sum()) player.model.zero_grad() (policy_loss + 0.5 * value_loss).backward() ensure_shared_grads(player.model, nn, gpu=False) optimizer.step() player.clear_actions()
def train_dpi(args): if "Custom-CartPole" in args.task: # env = make_CartPole_env() env = gym.make(args.task) state_shape = env.observation_space.shape or env.observation_space.n action_shape = 2 else: env = make_minigrid_env( args.task, flatten=True) ## FIXME change to false if ConvNet state_shape = env.observation_space.shape or env.observation_space.n if "Empty" in args.task: action_shape = 3 # selecting Basic actions in minigrid else: action_shape = 6 # all except done print("Observations shape:", state_shape) print("Actions shape:", action_shape) # seed FIXME, envs dont use seed on reset np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) # train_envs.seed(args.seed) # test_envs.seed(args.seed) net = MLP(state_shape, action_shape) # net = ConvNet(state_shape, action_shape) optim = torch.optim.Adam(net.parameters(), lr=args.lr) # define policy policy = DPI(net, optim, discount_factor=0.99, train_epochs=args.policy_epoch, batch_size=args.batch_size) # # load a previous policy if args.resume_path: policy.load_state_dict( torch.load(args.resume_path, map_location=args.device)) print("Loaded agent from: ", args.resume_path) if "MiniGrid" in args.task: tabular_env = Tabular_Minigrid(env) else: tabular_env = Tabular_CartPole(env) print( f"Num states: {tabular_env.nr_states}, Q table entries: {tabular_env.q.numel()}" ) # Training loop steps = args.step scores_steps = np.zeros(steps) for s in range(steps): print(f"STEP {s}") # collect qs tabular_env.__init_q_states__() tabular_env.travel_state_actions(policy) # net = MLP(state_shape, action_shape) # optim = torch.optim.Adam(net.parameters(), lr=args.lr) # policy = DPI(net, optim, discount_factor=0.99,train_epochs=args.policy_epoch,batch_size=args.batch_size) # learn new policy policy.learn(tabular_env) # evaluation scores = evaluate(policy, env, render=args.render) scores_steps[s] = scores.mean() print( f"Eval Score: {scores.mean():.2f} +- {scores.std():.2f} Total registered q: {tabular_env.q[tabular_env.q!=-1].sum().item()}\n" ) plt.figure() plt.plot(scores_steps, label="score") plt.legend() plt.xlabel("step") plt.ylabel("score")
download=True, transform=transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor()])) data_test = MNIST('./data/mnist', train=False, download=True, transform=transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor()])) data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=8) data_test_loader = DataLoader(data_test, batch_size=1024, num_workers=8) # net = LeNet() n_hidden = 20 net = MLP(n_hidden=n_hidden) criterion = nn.CrossEntropyLoss() net = net.cuda() criterion = criterion.cuda() # optimizer = optim.Adam(net.parameters(), lr=1e-1) optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9) cur_batch_win = None cur_batch_win_opts = { 'title': 'Epoch Loss Trace', 'xlabel': 'Batch Number',
class DDPG: def __init__(self, obs_dim, action_dim, hiddens_actor, hiddens_critic, layer_norm=False, memory_size=50000): self.obs_dim = obs_dim self.action_dim = action_dim self.noise_stddev = 1. self.noise_stddev_decrease = 5e-4 self.noise_stddev_lower = 5e-2 actor_activations = [dy.tanh for _ in range(len(hiddens_actor))] + [dy.tanh] critic_activations = [dy.tanh for _ in range(len(hiddens_critic))] + [None] self.actor = MLP(inpt_shape=(obs_dim,), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic = MLP(inpt_shape=(obs_dim + action_dim,), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target = MLP(inpt_shape=(obs_dim,), hiddens=hiddens_actor + [action_dim], activation=actor_activations, layer_norm=layer_norm) self.critic_target = MLP(inpt_shape=(obs_dim + action_dim,), hiddens=hiddens_critic + [1], activation=critic_activations, layer_norm=layer_norm) self.actor_target.update(self.actor, soft=False) self.critic_target.update(self.critic, soft=False) self.trainer_actor = dy.AdamTrainer(self.actor.pc) self.trainer_critic = dy.AdamTrainer(self.critic.pc) self.trainer_actor.set_learning_rate(1e-4) self.trainer_critic.set_learning_rate(1e-3) self.memory = Memory(memory_size) def act(self, obs): dy.renew_cg() action = self.actor(obs).npvalue() if self.noise_stddev > 0: noise = np.random.randn(self.action_dim) * self.noise_stddev action += noise return np.clip(action, -1, 1) def store(self, exp): self.memory.store(exp) def learn(self, batch_size): exps = self.memory.sample(batch_size) obss, actions, rewards, obs_nexts, dones = self._process(exps) # Update critic dy.renew_cg() target_actions = self.actor_target(obs_nexts, batched=True) target_values = self.critic_target(dy.concatenate([dy.inputTensor(obs_nexts, batched=True), target_actions]), batched=True) target_values = rewards + 0.99 * target_values.npvalue() * (1 - dones) dy.renew_cg() values = self.critic(np.concatenate([obss, actions]), batched=True) loss = dy.mean_batches((values - dy.inputTensor(target_values, batched=True)) ** 2) loss_value_critic = loss.npvalue() loss.backward() self.trainer_critic.update() # update actor dy.renew_cg() actions = self.actor(obss, batched=True) obs_and_actions = dy.concatenate([dy.inputTensor(obss, batched=True), actions]) loss = -dy.mean_batches(self.critic(obs_and_actions, batched=True)) loss_value_actor = loss.npvalue() loss.backward() self.trainer_actor.update() self.noise_stddev = ( self.noise_stddev - self.noise_stddev_decrease) if self.noise_stddev > self.noise_stddev_lower else self.noise_stddev_lower self.actor_target.update(self.actor, soft=True) self.critic_target.update(self.critic, soft=True) return loss_value_actor + loss_value_critic # data in memory: [memory_size, exp], exp: [obs, action, reward, obs_next, done] # output: [obss, actions, rewards, obs_nexts, dones], 'X's: [x, batch_size] @staticmethod def _process(exps): n = len(exps) ret = [] for i in range(5): ret.append([]) for j in range(n): ret[i].append(exps[j][i]) ret = [np.transpose(arr) for arr in ret] return ret @property def epsilon(self): return self.noise_stddev
def main(): parser = argparse.ArgumentParser(description='Pytorch example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the training data') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train net = MLP(args.unit, 28*28, 10) # Load designated network weight if args.resume: net.load_state_dict(torch.load(args.resume)) # Set model to GPU if args.gpu >= 0: # Make a specified GPU current device = 'cuda:' + str(args.gpu) net = net.to(device) # Setup a loss and an optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # Load the MNIST transform = transforms.Compose( [transforms.ToTensor()] ) trainvalset = datasets.MNIST(root='./data', train=True, download=True, transform=transform) # Split train/val n_samples = len(trainvalset) trainsize = int(n_samples * 0.9) valsize = n_samples - trainsize trainset, valset = torch.utils.data.random_split(trainvalset, [trainsize, valsize]) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, num_workers=2) valloader = torch.utils.data.DataLoader(valset, batch_size=args.batchsize, shuffle=True, num_workers=2) # Setup result holder x = [] ac_train = [] ac_val = [] # Train for ep in range(args.epoch): # Loop over the dataset multiple times running_loss = 0.0 correct_train = 0 total_train = 0 correct_val = 0 total_val = 0 for i, data in enumerate(trainloader, 0): # Get the inputs; data is a list of [inputs, labels] inputs, labels = data if args.gpu >= 0: inputs = inputs.to(device) labels = labels.to(device) # Reshape the input inputs = inputs.view(-1, 28*28) # Reset the parameter gradients optimizer.zero_grad() # Forward outputs = net(inputs) # Predict the label _, predicted = torch.max(outputs, 1) # Check whether estimation is right c = (predicted == labels).squeeze() for i in range(len(predicted)): correct_train += c[i].item() total_train += 1 # Backward + Optimize loss = criterion(outputs, labels) loss.backward() optimizer.step() # Add loss running_loss += loss.item() # Report loss of the epoch print('[epoch %d] loss: %.3f' % (ep + 1, running_loss)) # Save the model if (ep + 1) % args.frequency == 0: path = args.out + "/model_" + str(ep + 1) torch.save(net.state_dict(), path) # Validation with torch.no_grad(): for data in valloader: images, labels = data if args.gpu >= 0: images = images.to(device) labels = labels.to(device) # Reshape the input images = images.view(-1, 28*28) # Forward outputs = net(images) # Predict the label _, predicted = torch.max(outputs, 1) # Check whether estimation is right c = (predicted == labels).squeeze() for i in range(len(predicted)): correct_val += c[i].item() total_val += 1 # Record result x.append(ep+1) ac_train.append(100 * correct_train / total_train) ac_val.append(100 * correct_val / total_val) print('Finished Training') path = args.out + "/model_final" torch.save(net.state_dict(), path) # Draw graph fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.plot(x, ac_train, label='Training') ax.plot(x, ac_val, label='Validation') ax.legend() ax.set_xlabel("Epoch") ax.set_ylabel("Accuracy [%]") ax.set_ylim(80, 100) plt.savefig(args.out + '/accuracy_mnist_mlp.png')
def train(model, epochs=10, batch_size=32): x_train, x_test, y_train, y_test = load_data() for e in range(epochs): for batch_x, batch_y in create_batches(x_train, y_train, batch_size): model.zero_grad() for x, y in zip(batch_x, batch_y): model.forward(x, y) model.backward() model.step() print(f"Finished epoch {e}") test(model, x_test, y_test) def test(model, x_test, y_test): predictions = [] for x, y in zip(x_test, y_test): output = model.forward(x, y) predictions.append(np.argmax(output) == np.argmax(y)) accuracy = np.mean(predictions) print(f"Accuracy: {round(accuracy, 4)}") if __name__ == "__main__": np.random.seed(42) model = MLP([Layer(28 * 28, 128), Layer(128, 64), Layer(64, 10)]) train(model)
result_file = os.path.join(proj_path, "checkpoint", option, "testing_result_{:03d}.json".format(epoch_num)) checkpoint = torch.load(os.path.join(proj_path, "checkpoint", option, "cp_{:03d}.pth".format(epoch_num))) elif option == "CuboidTwoStep5D": net = cuboidCNN(64, True, inputChannel=5) result_file = os.path.join(proj_path, "checkpoint", option, "testing_result_{:03d}.json".format(epoch_num)) checkpoint = torch.load(os.path.join(proj_path, "checkpoint", option, "cp_{:03d}.pth".format(epoch_num))) elif option == "CuboidTwoStep4D": net = cuboidCNN(64, True, inputChannel=4) result_file = os.path.join(proj_path, "checkpoint", option, "testing_result_{:03d}.json".format(epoch_num)) checkpoint = torch.load(os.path.join(proj_path, "checkpoint", option, "cp_{:03d}.pth".format(epoch_num))) else: if option == "MLP": test_folder = os.path.join(proj_path, "data", "featureset_cnn", "test") testset = load_data(test_folder) testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=number_workers, drop_last=drop_last_batch) net = MLP(input_lenth=10240) result_file = os.path.join(proj_path, "checkpoint", option, "testing_result_{:03d}.json".format(epoch_num)) checkpoint = torch.load(os.path.join("..", "checkpoint", option, "cp_{:03d}.pth".format(epoch_num))) elif option == "MLPTwoStep2D": print("Evaluation for {}".format(option)) test_folder = os.path.join(proj_path, "data", "featureset_cnn_twostep2D", "test") testset = load_data(test_folder) testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=number_workers, drop_last=drop_last_batch) net = MLP(input_lenth=4096) result_file = os.path.join(proj_path, "checkpoint", option, "testing_result_{:03d}.json".format(epoch_num)) checkpoint = torch.load(os.path.join(proj_path, "checkpoint", option, "cp_{:03d}.pth".format(epoch_num))) elif option == "MLPTwoStep2DScaled": test_folder = os.path.join(proj_path, "data", "featureset_cnn_twostep2DScaled", "test") testset = load_data(test_folder) testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=number_workers,