def main(): # parser是训练和测试的一些参数设置,如果default里面有数值,则默认用它, # 要修改可以修改default,也可以在命令行输入 parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--model', default='CNN',#这里选择你要训练的模型 help='CNN or MLP') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') parser.add_argument('--save_dir', default='output/',#模型保存路径 help='dir saved models') args = parser.parse_args() #torch.cuda.is_available()会判断电脑是否有可用的GPU,没有则用cpu训练 use_cuda = not args.no_cuda and torch.cuda.is_available() print(use_cuda) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader( datasets.FashionMNIST('./fashionmnist_data/', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.FashionMNIST('./fashionmnist_data/', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) writer=SummaryWriter()#用于记录训练和测试的信息:loss,acc等 if args.model=='CNN': model = CNN().to(device)#CNN() or MLP if args.model=='MLP': model = MLP().to(device)#CNN() or MLP optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) #optimizer存储了所有parameters的引用,每个parameter都包含gradient scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[12, 24], gamma=0.1) #学习率按区间更新 model.train() log_loss=0 log_acc=0 for epoch in range(1, args.epochs + 1): for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) # negative log likelihood loss(nll_loss), sum up batch cross entropy loss.backward() optimizer.step() # 根据parameter的梯度更新parameter的值 # 这里设置每args.log_interval个间隔打印一次训练信息,同时进行一次验证,并且将验证(测试)的准确率存入writer if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) #下面是模型验证过程 model.eval() test_loss = 0 correct = 0 with torch.no_grad(): # 无需计算梯度 for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) writer.add_scalars('loss', {'train_loss':loss,'val_loss':test_loss},global_step=log_acc) writer.add_scalar('val_accuracy', correct / len(test_loader.dataset), global_step=log_acc) log_acc += 1 print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) model.train() if (args.save_model):#保存训练好的模型 if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) torch.save(model.state_dict(), os.path.join(args.save_dir,args.model+".pt")) writer.add_graph(model, (data,))# 将模型结构保存成图,跟踪数据流动 writer.close()
class PPO(): def __init__(self, envs): self.value_loss_coefficient = 0.5 self.entropy_coefficient = 0.05 self.learning_rate = 1e-4 self.envs = envs self.env_num=8 self.processor = Preprocessor(self.envs.observation_spec()[0]) self.sum_score = 0 self.n_steps = 512 self.gamma = 0.999 self.clip=0.27 self.sum_episode = 0 self.total_updates = -1 self.net = CNN().cuda() self.old_net = copy.deepcopy(self.net) self.old_net.cuda() self.epoch=4 self.batch_size=8 self.optimizer = optim.Adam( self.net.parameters(), self.learning_rate, weight_decay=0.01) def reset(self): self.obs_start = self.envs.reset() self.last_obs = self.processor.preprocess_obs(self.obs_start) def grad_step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() policy, value = self.net(screen, minimap, flat) return policy, value def step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.net(screen, minimap, flat) return policy, value def old_step(self,observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.old_net(screen, minimap, flat) return policy, value def select_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = Categorical(prob).sample() return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions*function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num,),dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def mask_unused_action(self, actions): fn_id, arg_ids = actions for n in range(fn_id.shape[0]): a_0 = fn_id[n] unused_types = set(ACTION_TYPES) - \ set(FUNCTIONS._func_list[a_0].args) for arg_type in unused_types: arg_ids[arg_type][n] = -1 return (fn_id, arg_ids) def functioncall_action(self, actions, size): height, width = size fn_id, arg_ids = actions fn_id = fn_id.numpy().tolist() actions_list = [] for n in range(len(fn_id)): a_0 = fn_id[n] a_l = [] for arg_type in FUNCTIONS._func_list[a_0].args: arg_id = arg_ids[arg_type][n].detach( ).numpy().squeeze().tolist() if is_spatial_action[arg_type]: arg = [arg_id % width, arg_id // height] else: arg = [arg_id] a_l.append(arg) action = FunctionCall(a_0, a_l) actions_list.append(action) return actions_list def get_value(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): _, value = self.net(screen, minimap, flat) return value def train(self): obs_raw = self.obs_start shape = (self.n_steps, self.envs.n_envs) sample_values = np.zeros(shape, dtype=np.float32) sample_obersavation = [] sample_rewards = np.zeros(shape, dtype=np.float32) sample_actions = [] sample_dones = np.zeros(shape, dtype=np.float32) scores = [] last_obs = self.last_obs for step in range(self.n_steps): policy, value = self.step(last_obs) actions = self.select_actions(policy, last_obs) if actions == 0: self.sum_episode = 7 self.sum_score = 0 return actions = self.mask_unused_action(actions) size = last_obs['screen'].shape[2:4] sample_values[step, :] = value.cpu() sample_obersavation.append(last_obs) sample_actions.append(actions) pysc2_action = self.functioncall_action(actions, size) '''fn_id, args_id = actions if fn_id[0].cpu().numpy().squeeze() in obs_raw[0].observation['available_actions']: print('1,True') else: print('1.False'),printoobs_info(obs_raw[0]) if fn_id[1].cpu().numpy().squeeze() in obs_raw[1].observation['available_actions']: print('2,True') else: print('2.False'),printoobs_info(obs_raw[1]) print(last_obs['available_actions'][0][fn_id[0]], last_obs['available_actions'][1][fn_id[1]],fn_id)''' obs_raw = self.envs.step(pysc2_action) # print("0:",pysc2_action[0].function) # print("1:",pysc2_action[1].function) last_obs = self.processor.preprocess_obs(obs_raw) sample_rewards[step, :] = [ i.reward for i in obs_raw] sample_dones[step, :] = [i.last() for i in obs_raw] for i in obs_raw: if i.last(): score = i.observation['score_cumulative'][0] self.sum_score += score self.sum_episode += 1 print("episode %d: score = %f" % (self.sum_episode, score)) # if self.sum_episode % 10 == 0: # torch.save(self.net.state_dict(), './save/episode' + # str(self.sum_episode)+'_score'+str(score)+'.pkl') self.last_obs = last_obs next_value = self.get_value(last_obs).cpu() returns = np.zeros( [sample_rewards.shape[0]+1, sample_rewards.shape[1]]) returns[-1, :] = next_value for i in reversed(range(sample_rewards.shape[0])): next_rewards = self.gamma*returns[i+1, :]*(1-sample_dones[i, :]) returns[i, :] = sample_rewards[i, :]+next_rewards returns = returns[:-1, :] advantages = returns-sample_values self.old_net.load_state_dict(self.net.state_dict()) actions = stack_and_flatten_actions(sample_actions) observation = flatten_first_dims_dict( stack_ndarray_dicts(sample_obersavation)) returns = flatten_first_dims(returns) advantages = flatten_first_dims(advantages) self.learn(observation, actions, returns, advantages) def learn(self, observation, actions, returns, advantages): temp=np.arange(returns.shape[0]) minibatch=returns.shape[0]//self.batch_size screen=observation['screen'] flat=observation['flat'] minimap=observation['minimap'] a_actions=observation['available_actions'] args_id=actions[1] for _ in range(self.epoch): np.random.shuffle(temp) for i in range(0,returns.shape[0],minibatch): j=i+minibatch shuffle=temp[i:j] batch_screen=screen[shuffle] batch_minimap=minimap[shuffle] batch_flat=flat[shuffle] batch_a_actions=a_actions[shuffle] batch_observation={'screen': batch_screen, 'minimap': batch_minimap, 'flat': batch_flat, 'available_actions': batch_a_actions} batch_advantages=advantages[shuffle] batch_fn_id=actions[0][shuffle] batch_args_id={k:v[shuffle] for k, v in args_id.items()} batch_actions=(batch_fn_id,batch_args_id) batch_returns=returns[shuffle] batch_advantages = torch.FloatTensor(batch_advantages).cuda() batch_returns = torch.FloatTensor(batch_returns).cuda() batch_advantages = (batch_advantages - batch_advantages.mean()) / (batch_advantages.std() + 1e-8) policy, batch_value = self.grad_step(batch_observation) log_probs = compute_policy_log_probs( batch_observation['available_actions'], policy, batch_actions).squeeze() old_policy, _ =self.old_step(batch_observation) old_log_probs=compute_policy_log_probs( batch_observation['available_actions'], old_policy, batch_actions).squeeze().detach() ratio=torch.exp(log_probs-old_log_probs) temp1=ratio*batch_advantages temp2=torch.clamp(ratio, 1 - self.clip, 1 + self.clip) * batch_advantages policy_loss = -torch.min(temp1, temp2).mean() value_loss = (batch_returns-batch_value).pow(2).mean() entropy_loss = compute_policy_entropy( batch_observation['available_actions'], policy, batch_actions) loss = policy_loss+value_loss*self.value_loss_coefficient +\ entropy_loss*self.entropy_coefficient # loss=loss.requires_grad_() self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0) self.optimizer.step()
print(train_set.classes) print(train_set.class_to_idx) print(train_set.__len__) print(test_set.classes) print(test_set.class_to_idx) print(test_set.__len__) # In[5]: cnn = CNN() print(cnn) # In[6]: optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) cross_loss = nn.CrossEntropyLoss() # the target label is not one-hotted triplet_loss = TripletLoss(0.5) # 选择损失函数 alpha = 0.5 for epoch in range(EPOCH): print('EPOCH ' + str(epoch)) # for step, (b_x, b_y) in enumerate(train_loader): for step, (anchor, positive, negative) in enumerate(train_loader): #output = cnn(b_x)[0] #loss = loss_func(output, b_y) anchor_output = cnn(anchor[0]) positive_output = cnn(positive[0]) negative_output = cnn(negative[0]) #print(anchor_output.detach().numpy().shape)
train_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/test.txt", WSIZE) train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/1998.txt", WSIZE) test_loader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) cnn = CNN().cuda() #optimizer optimizer = torch.optim.Adam(cnn.parameters(), lr=LR, weight_decay=1) loss_fun = Myloss().cuda() #training loop for epoch in range(EPOCH): for i, (x, y) in enumerate(train_loader): batch_x = Variable(x).cuda() batch_y = Variable(y).cuda() #输入训练数据 output = cnn(batch_x) #计算误差 loss = loss_fun(output, batch_y) #清空上一次梯度 optimizer.zero_grad() #误差反向传递
def main(): start_time = time() args = get_args() if args.checkpoint_dir_name: dir_name = args.checkpoint_dir_name else: dir_name = datetime.datetime.now().strftime('%y%m%d%H%M%S') path_to_dir = Path(__file__).resolve().parents[1] path_to_dir = os.path.join(path_to_dir, *['log', dir_name]) os.makedirs(path_to_dir, exist_ok=True) # tensorboard path_to_tensorboard = os.path.join(path_to_dir, 'tensorboard') os.makedirs(path_to_tensorboard, exist_ok=True) writer = SummaryWriter(path_to_tensorboard) # model saving os.makedirs(os.path.join(path_to_dir, 'model'), exist_ok=True) path_to_model = os.path.join(path_to_dir, *['model', 'model.tar']) # csv os.makedirs(os.path.join(path_to_dir, 'csv'), exist_ok=True) path_to_results_csv = os.path.join(path_to_dir, *['csv', 'results.csv']) path_to_args_csv = os.path.join(path_to_dir, *['csv', 'args.csv']) if not args.checkpoint_dir_name: with open(path_to_args_csv, 'a') as f: args_dict = vars(args) param_writer = csv.DictWriter(f, list(args_dict.keys())) param_writer.writeheader() param_writer.writerow(args_dict) # logging using hyperdash if not args.no_hyperdash: from hyperdash import Experiment exp = Experiment('Classification task on CIFAR10 dataset with CNN') for key in vars(args).keys(): exec("args.%s = exp.param('%s', args.%s)" % (key, key, key)) else: exp = None path_to_dataset = os.path.join( Path(__file__).resolve().parents[2], 'datasets') os.makedirs(path_to_dataset, exist_ok=True) train_loader, eval_loader, classes = get_loader( batch_size=args.batch_size, num_workers=args.num_workers, path_to_dataset=path_to_dataset) # show some of the training images, for fun. dataiter = iter(train_loader) images, labels = dataiter.next() img_grid = torchvision.utils.make_grid(images) matplotlib_imshow(img_grid) writer.add_image('four_CIFAR10_images', img_grid) # define a network, loss function and optimizer model = CNN() writer.add_graph(model, images) model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) start_epoch = 0 # resume training if args.checkpoint_dir_name: print('\nLoading the model...') checkpoint = torch.load(path_to_model) model.state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 summary(model, input_size=(3, 32, 32)) model.to(args.device) # train the network print('\n--------------------') print('Start training and evaluating the CNN') for epoch in range(start_epoch, args.n_epoch): start_time_per_epoch = time() train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.device, writer, epoch, classes) eval_loss, eval_acc = eval(eval_loader, model, criterion, args.device) elapsed_time_per_epoch = time() - start_time_per_epoch result_dict = { 'epoch': epoch, 'train_loss': train_loss, 'eval_loss': eval_loss, 'train_acc': train_acc, 'eval_acc': eval_acc, 'elapsed time': elapsed_time_per_epoch } with open(path_to_results_csv, 'a') as f: result_writer = csv.DictWriter(f, list(result_dict.keys())) if epoch == 0: result_writer.writeheader() result_writer.writerow(result_dict) # checkpoint torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, path_to_model) if exp: exp.metric('train loss', train_loss) exp.metric('eval loss', eval_loss) exp.metric('train acc', train_acc) exp.metric('eval acc', eval_acc) else: print(result_dict) writer.add_scalar('loss/train_loss', train_loss, epoch * len(train_loader)) writer.add_scalar('loss/eval_loss', eval_loss, epoch * len(eval_loader)) writer.add_scalar('acc/train_acc', train_acc, epoch * len(train_loader)) writer.add_scalar('acc/eval_acc', eval_acc, epoch * len(eval_loader)) elapsed_time = time() - start_time print('\nFinished Training, elapsed time ===> %f' % elapsed_time) if exp: exp.end() writer.close()
class A2C(): def __init__(self, envs, args): self.value_loss_coefficient = args.value_loss_weight self.entropy_coefficient = args.entropy_weight self.learning_rate = args.lr self.envs = envs self.map = args.map self.env_num = args.envs self.save = args.save_eposides self.save_dir = args.save_dir self.processor = Preprocessor(self.envs.observation_spec()[0], self.map, args.process_screen) self.sum_score = 0 self.n_steps = 8 self.gamma = 0.999 self.sum_episode = 0 self.total_updates = -1 if args.process_screen: self.net = CNN(348, 1985).cuda() else: self.net = CNN().cuda() self.optimizer = optim.Adam(self.net.parameters(), self.learning_rate, weight_decay=0.01) def reset(self): self.obs_start = self.envs.reset() self.last_obs = self.processor.preprocess_obs(self.obs_start) def grad_step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() policy, value = self.net(screen, minimap, flat) return policy, value def step(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): policy, value = self.net(screen, minimap, flat) return policy, value def select_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = Categorical(prob).sample() return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions * function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num, ), dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def determined_actions(self, policy, last_obs): available_actions = last_obs['available_actions'] def sample(prob): actions = torch.argmax(prob, dim=1) return actions function_pi, args_pi = policy available_actions = torch.FloatTensor(available_actions) function_pi = available_actions * function_pi.cpu() function_pi /= torch.sum(function_pi, dim=1, keepdim=True) try: function_sample = sample(function_pi) except: return 0 args_sample = dict() for type, pi in args_pi.items(): if type.name == 'queued': args_sample[type] = torch.zeros((self.env_num, ), dtype=int) else: args_sample[type] = sample(pi).cpu() return function_sample, args_sample def mask_unused_action(self, actions): fn_id, arg_ids = actions for n in range(fn_id.shape[0]): a_0 = fn_id[n] unused_types = set(ACTION_TYPES) - \ set(FUNCTIONS._func_list[a_0].args) for arg_type in unused_types: arg_ids[arg_type][n] = -1 return (fn_id, arg_ids) def functioncall_action(self, actions, size): height, width = size fn_id, arg_ids = actions fn_id = fn_id.numpy().tolist() actions_list = [] for n in range(len(fn_id)): a_0 = fn_id[n] a_l = [] for arg_type in FUNCTIONS._func_list[a_0].args: arg_id = arg_ids[arg_type][n].detach().numpy().squeeze( ).tolist() if is_spatial_action[arg_type]: arg = [arg_id % width, arg_id // height] else: arg = [arg_id] a_l.append(arg) action = FunctionCall(a_0, a_l) actions_list.append(action) return actions_list def get_value(self, observation): screen = torch.FloatTensor(observation['screen']).cuda() minimap = torch.FloatTensor(observation['minimap']).cuda() flat = torch.FloatTensor(observation['flat']).cuda() with torch.no_grad(): _, value = self.net(screen, minimap, flat) return value def train(self): obs_raw = self.obs_start shape = (self.n_steps, self.envs.n_envs) sample_values = np.zeros(shape, dtype=np.float32) sample_obersavation = [] sample_rewards = np.zeros(shape, dtype=np.float32) sample_actions = [] sample_dones = np.zeros(shape, dtype=np.float32) scores = [] last_obs = self.last_obs for step in range(self.n_steps): policy, value = self.step(last_obs) actions = self.select_actions(policy, last_obs) if actions == 0: self.sum_episode = 7 self.sum_score = 0 return actions = self.mask_unused_action(actions) size = last_obs['screen'].shape[2:4] sample_values[step, :] = value.cpu() sample_obersavation.append(last_obs) sample_actions.append(actions) pysc2_action = self.functioncall_action(actions, size) obs_raw = self.envs.step(pysc2_action) # print("0:",pysc2_action[0].function) # print("1:",pysc2_action[1].function) last_obs = self.processor.preprocess_obs(obs_raw) sample_rewards[step, :] = [ 1 if i.reward else -0.1 for i in obs_raw ] sample_dones[step, :] = [i.last() for i in obs_raw] for i in obs_raw: if i.last(): score = i.observation['score_cumulative'][0] self.sum_score += score self.sum_episode += 1 print("episode %d: score = %f" % (self.sum_episode, score)) if self.sum_episode % self.save == 0: torch.save( self.net.state_dict(), self.save_dir + '/' + str(self.sum_episode) + '_score' + str(score) + '.pkl') self.last_obs = last_obs next_value = self.get_value(last_obs).cpu() returns = np.zeros( [sample_rewards.shape[0] + 1, sample_rewards.shape[1]]) returns[-1, :] = next_value for i in reversed(range(sample_rewards.shape[0])): next_rewards = self.gamma * returns[i + 1, :] * ( 1 - sample_dones[i, :]) returns[i, :] = sample_rewards[i, :] + next_rewards returns = returns[:-1, :] advantages = returns - sample_values actions = stack_and_flatten_actions(sample_actions) observation = flatten_first_dims_dict( stack_ndarray_dicts(sample_obersavation)) returns = flatten_first_dims(returns) advantages = flatten_first_dims(advantages) self.learn(observation, actions, returns, advantages) def learn(self, observation, actions, returns, advantages): advantages = torch.FloatTensor(advantages).cuda() returns = torch.FloatTensor(returns).cuda() policy, value = self.grad_step(observation) log_probs = compute_policy_log_probs(observation['available_actions'], policy, actions).squeeze() policy_loss = -(log_probs * advantages).mean() value_loss = (returns - value).pow(2).mean() entropy_loss = compute_policy_entropy(observation['available_actions'], policy, actions) loss = policy_loss+value_loss*self.value_loss_coefficient +\ entropy_loss*self.entropy_coefficient #print(loss) # loss=loss.requires_grad_() self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0) self.optimizer.step()
def main(): parser = argparse.ArgumentParser(description='pytorch example: MNIST') parser.add_argument('--batch', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--display', '-d', type=int, default=100, help='Number of interval to show progress') args = parser.parse_args() batch_size = args.batch epoch_size = args.epoch display_interval = args.display transform = transforms.Compose( [transforms.ToTensor(), # transform to torch.Tensor transforms.Normalize(mean=(0.5,), std=(0.5,))]) trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform) # trainset... # <class 'object'> # <class 'torch.utils.data.dataset.Dataset'> # <class 'torchvision.datasets.mnist.MNIST'> # trainset[0][0]... # <class 'object'> # <class 'torch._C._TensorBase'> # <class 'torch.Tensor'> torch.Size([1, 28, 28]) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('device:', device) net = CNN() print(net) print() net.to(device) # for GPU criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) epoch_list = [] train_acc_list = [] test_acc_list = [] for epoch in range(epoch_size): # loop over the dataset multiple times running_loss = 0.0 train_true = [] train_pred = [] for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # inputs... # <class 'object'> # <class 'torch._C._TensorBase'> # <class 'torch.Tensor'> torch.Size([100, 1, 28, 28]) train_true.extend(labels.tolist()) inputs, labels = inputs.to(device), labels.to(device) # for GPU # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) train_pred.extend(predicted.tolist()) # print statistics running_loss += loss.item() if i % display_interval == display_interval - 1: # print every 100 mini-batches print('[epochs: {}, mini-batches: {}, images: {}] loss: {:.3f}'.format( epoch + 1, i + 1, (i + 1) * batch_size, running_loss / display_interval)) running_loss = 0.0 test_true = [] test_pred = [] with torch.no_grad(): for data in testloader: images, labels = data test_true.extend(labels.tolist()) images, labels = images.to(device), labels.to(device) # for GPU outputs = net(images) _, predicted = torch.max(outputs.data, 1) test_pred.extend(predicted.tolist()) train_acc = accuracy_score(train_true, train_pred) test_acc = accuracy_score(test_true, test_pred) print(' epocs: {}, train acc.: {:.3f}, test acc.: {:.3f}'.format(epoch + 1, train_acc, test_acc)) print() epoch_list.append(epoch + 1) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print('Finished Training') print('Save Network') torch.save(net.state_dict(), 'model.pth') df = pd.DataFrame({'epoch': epoch_list, 'train/accuracy': train_acc_list, 'test/accuracy': test_acc_list}) print('Save Training Log') df.to_csv('train.log', index=False)