Beispiel #1
0
def main():
    # parser是训练和测试的一些参数设置,如果default里面有数值,则默认用它,
    # 要修改可以修改default,也可以在命令行输入
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--model', default='CNN',#这里选择你要训练的模型
                        help='CNN or MLP')
    parser.add_argument('--batch-size', type=int, default=128, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=50, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=True,
                        help='For Saving the current Model')
    parser.add_argument('--save_dir', default='output/',#模型保存路径
                        help='dir saved models')
    args = parser.parse_args()
    #torch.cuda.is_available()会判断电脑是否有可用的GPU,没有则用cpu训练
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    print(use_cuda)
    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST('./fashionmnist_data/', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST('./fashionmnist_data/', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)

    writer=SummaryWriter()#用于记录训练和测试的信息:loss,acc等
    if args.model=='CNN':
        model = CNN().to(device)#CNN() or MLP
    if args.model=='MLP':
        model = MLP().to(device)#CNN() or MLP
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)   #optimizer存储了所有parameters的引用,每个parameter都包含gradient
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[12, 24], gamma=0.1)   #学习率按区间更新
    model.train()
    log_loss=0
    log_acc=0
    for epoch in range(1, args.epochs + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)  # negative log likelihood loss(nll_loss), sum up batch cross entropy
            loss.backward()
            optimizer.step()  # 根据parameter的梯度更新parameter的值
            # 这里设置每args.log_interval个间隔打印一次训练信息,同时进行一次验证,并且将验证(测试)的准确率存入writer
            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader), loss.item()))
                #下面是模型验证过程
                model.eval()
                test_loss = 0
                correct = 0
                with torch.no_grad():  # 无需计算梯度
                    for data, target in test_loader:
                        data, target = data.to(device), target.to(device)
                        output = model(data)
                        test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
                        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                        correct += pred.eq(target.view_as(pred)).sum().item()
                test_loss /= len(test_loader.dataset)
                writer.add_scalars('loss', {'train_loss':loss,'val_loss':test_loss},global_step=log_acc)
                writer.add_scalar('val_accuracy', correct / len(test_loader.dataset), global_step=log_acc)
                log_acc += 1
                print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                    test_loss, correct, len(test_loader.dataset),
                    100. * correct / len(test_loader.dataset)))
                model.train()
    if (args.save_model):#保存训练好的模型
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        torch.save(model.state_dict(), os.path.join(args.save_dir,args.model+".pt"))
    writer.add_graph(model, (data,))# 将模型结构保存成图,跟踪数据流动
    writer.close()
Beispiel #2
0
class PPO():

    def __init__(self, envs):
        self.value_loss_coefficient = 0.5
        self.entropy_coefficient = 0.05
        self.learning_rate = 1e-4
        self.envs = envs
        self.env_num=8
        self.processor = Preprocessor(self.envs.observation_spec()[0])
        self.sum_score = 0
        self.n_steps = 512
        self.gamma = 0.999
        self.clip=0.27
        self.sum_episode = 0
        self.total_updates = -1
        self.net = CNN().cuda()
        self.old_net = copy.deepcopy(self.net)
        self.old_net.cuda()
        self.epoch=4
        self.batch_size=8
        self.optimizer = optim.Adam(
            self.net.parameters(), self.learning_rate, weight_decay=0.01)

    def reset(self):
        self.obs_start = self.envs.reset()
        self.last_obs = self.processor.preprocess_obs(self.obs_start)

    def grad_step(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        policy, value = self.net(screen, minimap, flat)
        return policy, value

    def step(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        with torch.no_grad():
            policy, value = self.net(screen, minimap, flat)
        return policy, value
    def old_step(self,observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        with torch.no_grad():
            policy, value = self.old_net(screen, minimap, flat)
        return policy, value
    def select_actions(self, policy, last_obs):
        available_actions = last_obs['available_actions']

        def sample(prob):
            actions = Categorical(prob).sample()
            return actions
        function_pi, args_pi = policy
        available_actions = torch.FloatTensor(available_actions)
        function_pi = available_actions*function_pi.cpu()
        function_pi /= torch.sum(function_pi, dim=1, keepdim=True)
        try:
            function_sample = sample(function_pi)
        except:
            return 0
        args_sample = dict()
        for type, pi in args_pi.items():
            if type.name == 'queued':
                args_sample[type] = torch.zeros((self.env_num,),dtype=int)
            else:
                args_sample[type] = sample(pi).cpu()
        return function_sample, args_sample

    def mask_unused_action(self, actions):
        fn_id, arg_ids = actions
        for n in range(fn_id.shape[0]):
            a_0 = fn_id[n]
            unused_types = set(ACTION_TYPES) - \
                set(FUNCTIONS._func_list[a_0].args)
            for arg_type in unused_types:
                arg_ids[arg_type][n] = -1
        return (fn_id, arg_ids)

    def functioncall_action(self, actions, size):
        height, width = size
        fn_id, arg_ids = actions
        fn_id = fn_id.numpy().tolist()
        actions_list = []
        for n in range(len(fn_id)):
            a_0 = fn_id[n]
            a_l = []
            for arg_type in FUNCTIONS._func_list[a_0].args:
                arg_id = arg_ids[arg_type][n].detach(
                ).numpy().squeeze().tolist()
                if is_spatial_action[arg_type]:
                    arg = [arg_id % width, arg_id // height]
                else:
                    arg = [arg_id]
                a_l.append(arg)
            action = FunctionCall(a_0, a_l)

            actions_list.append(action)
        return actions_list

    def get_value(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        with torch.no_grad():
            _, value = self.net(screen, minimap, flat)
        return value

    def train(self):
        obs_raw = self.obs_start
        shape = (self.n_steps, self.envs.n_envs)
        sample_values = np.zeros(shape, dtype=np.float32)
        sample_obersavation = []
        sample_rewards = np.zeros(shape, dtype=np.float32)
        sample_actions = []
        sample_dones = np.zeros(shape, dtype=np.float32)
        scores = []
        last_obs = self.last_obs
        for step in range(self.n_steps):
            policy, value = self.step(last_obs)

            actions = self.select_actions(policy, last_obs)
            if actions == 0:
                self.sum_episode = 7
                self.sum_score = 0
                return
            actions = self.mask_unused_action(actions)

            size = last_obs['screen'].shape[2:4]
            sample_values[step, :] = value.cpu()
            sample_obersavation.append(last_obs)
            sample_actions.append(actions)
            pysc2_action = self.functioncall_action(actions, size)

            '''fn_id, args_id = actions
            if fn_id[0].cpu().numpy().squeeze() in obs_raw[0].observation['available_actions']:
                print('1,True')
            else: print('1.False'),printoobs_info(obs_raw[0])
            if fn_id[1].cpu().numpy().squeeze() in obs_raw[1].observation['available_actions']:
                print('2,True')
            else: print('2.False'),printoobs_info(obs_raw[1])
            print(last_obs['available_actions'][0][fn_id[0]], last_obs['available_actions'][1][fn_id[1]],fn_id)'''
            obs_raw = self.envs.step(pysc2_action)
            # print("0:",pysc2_action[0].function)
            # print("1:",pysc2_action[1].function)

            last_obs = self.processor.preprocess_obs(obs_raw)
            sample_rewards[step, :] = [
                i.reward for i in obs_raw]
            sample_dones[step, :] = [i.last() for i in obs_raw]

            for i in obs_raw:
                if i.last():
                    score = i.observation['score_cumulative'][0]
                    self.sum_score += score
                    self.sum_episode += 1
                    print("episode %d: score = %f" % (self.sum_episode, score))
                    # if self.sum_episode % 10 == 0:
                    #     torch.save(self.net.state_dict(), './save/episode' +
                    #                str(self.sum_episode)+'_score'+str(score)+'.pkl')

        self.last_obs = last_obs
        next_value = self.get_value(last_obs).cpu()

        returns = np.zeros(
            [sample_rewards.shape[0]+1, sample_rewards.shape[1]])
        returns[-1, :] = next_value
        for i in reversed(range(sample_rewards.shape[0])):
            next_rewards = self.gamma*returns[i+1, :]*(1-sample_dones[i, :])
            returns[i, :] = sample_rewards[i, :]+next_rewards
        returns = returns[:-1, :]
        advantages = returns-sample_values
        self.old_net.load_state_dict(self.net.state_dict())
        actions = stack_and_flatten_actions(sample_actions)
        observation = flatten_first_dims_dict(
            stack_ndarray_dicts(sample_obersavation))
        returns = flatten_first_dims(returns)
        advantages = flatten_first_dims(advantages)
        self.learn(observation, actions, returns, advantages)

    def learn(self, observation, actions, returns, advantages):
        temp=np.arange(returns.shape[0])
        minibatch=returns.shape[0]//self.batch_size
        screen=observation['screen']
        flat=observation['flat']
        minimap=observation['minimap']
        a_actions=observation['available_actions']
        args_id=actions[1]
        for _ in range(self.epoch):
            np.random.shuffle(temp)
            for i in range(0,returns.shape[0],minibatch):
                j=i+minibatch
                shuffle=temp[i:j]
                batch_screen=screen[shuffle]
                batch_minimap=minimap[shuffle]
                batch_flat=flat[shuffle]
                batch_a_actions=a_actions[shuffle]
                batch_observation={'screen': batch_screen,
                                    'minimap': batch_minimap,
                                    'flat': batch_flat,
                                    'available_actions': batch_a_actions}
                batch_advantages=advantages[shuffle]
                batch_fn_id=actions[0][shuffle]

                batch_args_id={k:v[shuffle] for k, v in args_id.items()}
                batch_actions=(batch_fn_id,batch_args_id)
                batch_returns=returns[shuffle]

                batch_advantages = torch.FloatTensor(batch_advantages).cuda()
                batch_returns = torch.FloatTensor(batch_returns).cuda()
                batch_advantages = (batch_advantages - batch_advantages.mean()) / (batch_advantages.std() + 1e-8)

                policy, batch_value = self.grad_step(batch_observation)
                log_probs = compute_policy_log_probs(
                    batch_observation['available_actions'], policy, batch_actions).squeeze()

                old_policy, _ =self.old_step(batch_observation)
                old_log_probs=compute_policy_log_probs(
                    batch_observation['available_actions'], old_policy, batch_actions).squeeze().detach()
                ratio=torch.exp(log_probs-old_log_probs)
                temp1=ratio*batch_advantages
                temp2=torch.clamp(ratio, 1 - self.clip, 1 + self.clip) * batch_advantages

                policy_loss = -torch.min(temp1, temp2).mean()

                value_loss = (batch_returns-batch_value).pow(2).mean()
                entropy_loss = compute_policy_entropy(
                    batch_observation['available_actions'], policy, batch_actions)
                loss = policy_loss+value_loss*self.value_loss_coefficient +\
                    entropy_loss*self.entropy_coefficient
                # loss=loss.requires_grad_()
                self.optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0)
                self.optimizer.step()
Beispiel #3
0
print(train_set.classes)
print(train_set.class_to_idx)
print(train_set.__len__)

print(test_set.classes)
print(test_set.class_to_idx)
print(test_set.__len__)

# In[5]:

cnn = CNN()
print(cnn)

# In[6]:

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
cross_loss = nn.CrossEntropyLoss()  # the target label is not one-hotted
triplet_loss = TripletLoss(0.5)  # 选择损失函数
alpha = 0.5

for epoch in range(EPOCH):
    print('EPOCH ' + str(epoch))
    # for step, (b_x, b_y) in enumerate(train_loader):
    for step, (anchor, positive, negative) in enumerate(train_loader):
        #output = cnn(b_x)[0]
        #loss = loss_func(output, b_y)
        anchor_output = cnn(anchor[0])
        positive_output = cnn(positive[0])
        negative_output = cnn(negative[0])

        #print(anchor_output.detach().numpy().shape)
Beispiel #4
0
train_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/test.txt",
                             WSIZE)

train_loader = data.DataLoader(train_dataset,
                               batch_size=BATCH_SIZE,
                               shuffle=True)

test_dataset = WindowedData("/Users/fdhcg/Desktop/clshen/data/1998.txt", WSIZE)

test_loader = data.DataLoader(test_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=False)

cnn = CNN().cuda()
#optimizer
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR, weight_decay=1)

loss_fun = Myloss().cuda()

#training loop
for epoch in range(EPOCH):
    for i, (x, y) in enumerate(train_loader):
        batch_x = Variable(x).cuda()
        batch_y = Variable(y).cuda()
        #输入训练数据
        output = cnn(batch_x)
        #计算误差
        loss = loss_fun(output, batch_y)
        #清空上一次梯度
        optimizer.zero_grad()
        #误差反向传递
Beispiel #5
0
def main():
    start_time = time()
    args = get_args()
    if args.checkpoint_dir_name:
        dir_name = args.checkpoint_dir_name
    else:
        dir_name = datetime.datetime.now().strftime('%y%m%d%H%M%S')
    path_to_dir = Path(__file__).resolve().parents[1]
    path_to_dir = os.path.join(path_to_dir, *['log', dir_name])
    os.makedirs(path_to_dir, exist_ok=True)
    # tensorboard
    path_to_tensorboard = os.path.join(path_to_dir, 'tensorboard')
    os.makedirs(path_to_tensorboard, exist_ok=True)
    writer = SummaryWriter(path_to_tensorboard)
    # model saving
    os.makedirs(os.path.join(path_to_dir, 'model'), exist_ok=True)
    path_to_model = os.path.join(path_to_dir, *['model', 'model.tar'])
    # csv
    os.makedirs(os.path.join(path_to_dir, 'csv'), exist_ok=True)
    path_to_results_csv = os.path.join(path_to_dir, *['csv', 'results.csv'])
    path_to_args_csv = os.path.join(path_to_dir, *['csv', 'args.csv'])
    if not args.checkpoint_dir_name:
        with open(path_to_args_csv, 'a') as f:
            args_dict = vars(args)
            param_writer = csv.DictWriter(f, list(args_dict.keys()))
            param_writer.writeheader()
            param_writer.writerow(args_dict)

    # logging using hyperdash
    if not args.no_hyperdash:
        from hyperdash import Experiment
        exp = Experiment('Classification task on CIFAR10 dataset with CNN')
        for key in vars(args).keys():
            exec("args.%s = exp.param('%s', args.%s)" % (key, key, key))
    else:
        exp = None

    path_to_dataset = os.path.join(
        Path(__file__).resolve().parents[2], 'datasets')
    os.makedirs(path_to_dataset, exist_ok=True)
    train_loader, eval_loader, classes = get_loader(
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        path_to_dataset=path_to_dataset)

    # show some of the training images, for fun.
    dataiter = iter(train_loader)
    images, labels = dataiter.next()
    img_grid = torchvision.utils.make_grid(images)
    matplotlib_imshow(img_grid)
    writer.add_image('four_CIFAR10_images', img_grid)

    # define a network, loss function and optimizer
    model = CNN()
    writer.add_graph(model, images)
    model = torch.nn.DataParallel(model)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)
    start_epoch = 0
    # resume training
    if args.checkpoint_dir_name:
        print('\nLoading the model...')
        checkpoint = torch.load(path_to_model)
        model.state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
    summary(model, input_size=(3, 32, 32))
    model.to(args.device)

    # train the network
    print('\n--------------------')
    print('Start training and evaluating the CNN')
    for epoch in range(start_epoch, args.n_epoch):
        start_time_per_epoch = time()
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, args.device, writer, epoch,
                                      classes)
        eval_loss, eval_acc = eval(eval_loader, model, criterion, args.device)
        elapsed_time_per_epoch = time() - start_time_per_epoch
        result_dict = {
            'epoch': epoch,
            'train_loss': train_loss,
            'eval_loss': eval_loss,
            'train_acc': train_acc,
            'eval_acc': eval_acc,
            'elapsed time': elapsed_time_per_epoch
        }
        with open(path_to_results_csv, 'a') as f:
            result_writer = csv.DictWriter(f, list(result_dict.keys()))
            if epoch == 0: result_writer.writeheader()
            result_writer.writerow(result_dict)
        # checkpoint
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, path_to_model)
        if exp:
            exp.metric('train loss', train_loss)
            exp.metric('eval loss', eval_loss)
            exp.metric('train acc', train_acc)
            exp.metric('eval acc', eval_acc)
        else:
            print(result_dict)

        writer.add_scalar('loss/train_loss', train_loss,
                          epoch * len(train_loader))
        writer.add_scalar('loss/eval_loss', eval_loss,
                          epoch * len(eval_loader))
        writer.add_scalar('acc/train_acc', train_acc,
                          epoch * len(train_loader))
        writer.add_scalar('acc/eval_acc', eval_acc, epoch * len(eval_loader))

    elapsed_time = time() - start_time
    print('\nFinished Training, elapsed time ===> %f' % elapsed_time)
    if exp:
        exp.end()
    writer.close()
class A2C():
    def __init__(self, envs, args):
        self.value_loss_coefficient = args.value_loss_weight
        self.entropy_coefficient = args.entropy_weight
        self.learning_rate = args.lr
        self.envs = envs

        self.map = args.map
        self.env_num = args.envs
        self.save = args.save_eposides
        self.save_dir = args.save_dir
        self.processor = Preprocessor(self.envs.observation_spec()[0],
                                      self.map, args.process_screen)
        self.sum_score = 0
        self.n_steps = 8
        self.gamma = 0.999
        self.sum_episode = 0
        self.total_updates = -1

        if args.process_screen:
            self.net = CNN(348, 1985).cuda()
        else:
            self.net = CNN().cuda()
        self.optimizer = optim.Adam(self.net.parameters(),
                                    self.learning_rate,
                                    weight_decay=0.01)

    def reset(self):
        self.obs_start = self.envs.reset()
        self.last_obs = self.processor.preprocess_obs(self.obs_start)

    def grad_step(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        policy, value = self.net(screen, minimap, flat)
        return policy, value

    def step(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        with torch.no_grad():
            policy, value = self.net(screen, minimap, flat)
        return policy, value

    def select_actions(self, policy, last_obs):
        available_actions = last_obs['available_actions']

        def sample(prob):
            actions = Categorical(prob).sample()
            return actions

        function_pi, args_pi = policy
        available_actions = torch.FloatTensor(available_actions)
        function_pi = available_actions * function_pi.cpu()
        function_pi /= torch.sum(function_pi, dim=1, keepdim=True)
        try:
            function_sample = sample(function_pi)
        except:
            return 0
        args_sample = dict()
        for type, pi in args_pi.items():
            if type.name == 'queued':
                args_sample[type] = torch.zeros((self.env_num, ), dtype=int)
            else:
                args_sample[type] = sample(pi).cpu()
        return function_sample, args_sample

    def determined_actions(self, policy, last_obs):
        available_actions = last_obs['available_actions']

        def sample(prob):
            actions = torch.argmax(prob, dim=1)
            return actions

        function_pi, args_pi = policy
        available_actions = torch.FloatTensor(available_actions)
        function_pi = available_actions * function_pi.cpu()
        function_pi /= torch.sum(function_pi, dim=1, keepdim=True)
        try:
            function_sample = sample(function_pi)
        except:
            return 0
        args_sample = dict()
        for type, pi in args_pi.items():
            if type.name == 'queued':
                args_sample[type] = torch.zeros((self.env_num, ), dtype=int)
            else:
                args_sample[type] = sample(pi).cpu()
        return function_sample, args_sample

    def mask_unused_action(self, actions):
        fn_id, arg_ids = actions
        for n in range(fn_id.shape[0]):
            a_0 = fn_id[n]
            unused_types = set(ACTION_TYPES) - \
                set(FUNCTIONS._func_list[a_0].args)
            for arg_type in unused_types:
                arg_ids[arg_type][n] = -1
        return (fn_id, arg_ids)

    def functioncall_action(self, actions, size):
        height, width = size
        fn_id, arg_ids = actions
        fn_id = fn_id.numpy().tolist()
        actions_list = []
        for n in range(len(fn_id)):
            a_0 = fn_id[n]
            a_l = []
            for arg_type in FUNCTIONS._func_list[a_0].args:
                arg_id = arg_ids[arg_type][n].detach().numpy().squeeze(
                ).tolist()
                if is_spatial_action[arg_type]:
                    arg = [arg_id % width, arg_id // height]
                else:
                    arg = [arg_id]
                a_l.append(arg)
            action = FunctionCall(a_0, a_l)

            actions_list.append(action)
        return actions_list

    def get_value(self, observation):
        screen = torch.FloatTensor(observation['screen']).cuda()
        minimap = torch.FloatTensor(observation['minimap']).cuda()
        flat = torch.FloatTensor(observation['flat']).cuda()
        with torch.no_grad():
            _, value = self.net(screen, minimap, flat)
        return value

    def train(self):
        obs_raw = self.obs_start
        shape = (self.n_steps, self.envs.n_envs)
        sample_values = np.zeros(shape, dtype=np.float32)
        sample_obersavation = []
        sample_rewards = np.zeros(shape, dtype=np.float32)
        sample_actions = []
        sample_dones = np.zeros(shape, dtype=np.float32)
        scores = []
        last_obs = self.last_obs
        for step in range(self.n_steps):
            policy, value = self.step(last_obs)

            actions = self.select_actions(policy, last_obs)
            if actions == 0:
                self.sum_episode = 7
                self.sum_score = 0
                return
            actions = self.mask_unused_action(actions)

            size = last_obs['screen'].shape[2:4]
            sample_values[step, :] = value.cpu()
            sample_obersavation.append(last_obs)
            sample_actions.append(actions)
            pysc2_action = self.functioncall_action(actions, size)

            obs_raw = self.envs.step(pysc2_action)
            # print("0:",pysc2_action[0].function)
            # print("1:",pysc2_action[1].function)

            last_obs = self.processor.preprocess_obs(obs_raw)
            sample_rewards[step, :] = [
                1 if i.reward else -0.1 for i in obs_raw
            ]
            sample_dones[step, :] = [i.last() for i in obs_raw]

            for i in obs_raw:
                if i.last():
                    score = i.observation['score_cumulative'][0]
                    self.sum_score += score
                    self.sum_episode += 1
                    print("episode %d: score = %f" % (self.sum_episode, score))
                    if self.sum_episode % self.save == 0:
                        torch.save(
                            self.net.state_dict(),
                            self.save_dir + '/' + str(self.sum_episode) +
                            '_score' + str(score) + '.pkl')

        self.last_obs = last_obs
        next_value = self.get_value(last_obs).cpu()

        returns = np.zeros(
            [sample_rewards.shape[0] + 1, sample_rewards.shape[1]])
        returns[-1, :] = next_value
        for i in reversed(range(sample_rewards.shape[0])):
            next_rewards = self.gamma * returns[i + 1, :] * (
                1 - sample_dones[i, :])
            returns[i, :] = sample_rewards[i, :] + next_rewards
        returns = returns[:-1, :]
        advantages = returns - sample_values
        actions = stack_and_flatten_actions(sample_actions)
        observation = flatten_first_dims_dict(
            stack_ndarray_dicts(sample_obersavation))
        returns = flatten_first_dims(returns)
        advantages = flatten_first_dims(advantages)
        self.learn(observation, actions, returns, advantages)

    def learn(self, observation, actions, returns, advantages):
        advantages = torch.FloatTensor(advantages).cuda()
        returns = torch.FloatTensor(returns).cuda()
        policy, value = self.grad_step(observation)
        log_probs = compute_policy_log_probs(observation['available_actions'],
                                             policy, actions).squeeze()

        policy_loss = -(log_probs * advantages).mean()
        value_loss = (returns - value).pow(2).mean()
        entropy_loss = compute_policy_entropy(observation['available_actions'],
                                              policy, actions)
        loss = policy_loss+value_loss*self.value_loss_coefficient +\
            entropy_loss*self.entropy_coefficient
        #print(loss)
        # loss=loss.requires_grad_()
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.0)
        self.optimizer.step()
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser(description='pytorch example: MNIST')
    parser.add_argument('--batch', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--display', '-d', type=int, default=100,
                        help='Number of interval to show progress')
    args = parser.parse_args()

    batch_size = args.batch
    epoch_size = args.epoch
    display_interval = args.display

    transform = transforms.Compose(
        [transforms.ToTensor(),    # transform to torch.Tensor
        transforms.Normalize(mean=(0.5,), std=(0.5,))])

    trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                          download=True, transform=transform)
    # trainset...
    # <class 'object'>
    #     <class 'torch.utils.data.dataset.Dataset'>
    #         <class 'torchvision.datasets.mnist.MNIST'>
    
    # trainset[0][0]...
    # <class 'object'>
    #     <class 'torch._C._TensorBase'>
    #         <class 'torch.Tensor'> torch.Size([1, 28, 28])

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.MNIST(root='./data', train=False,
                                         download=True, transform=transform)

    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, num_workers=2)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('device:', device)

    net = CNN()
    print(net)
    print()

    net.to(device)  # for GPU

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    epoch_list = []
    train_acc_list = []
    test_acc_list = []
    for epoch in range(epoch_size):  # loop over the dataset multiple times

        running_loss = 0.0
        train_true = []
        train_pred = []
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            
            # inputs...
            # <class 'object'>
            #     <class 'torch._C._TensorBase'>
            #         <class 'torch.Tensor'> torch.Size([100, 1, 28, 28])
            
            train_true.extend(labels.tolist())

            inputs, labels = inputs.to(device), labels.to(device)   # for GPU

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            train_pred.extend(predicted.tolist())

            # print statistics
            running_loss += loss.item()
            if i % display_interval == display_interval - 1:    # print every 100 mini-batches        
                print('[epochs: {}, mini-batches: {}, images: {}] loss: {:.3f}'.format(
                    epoch + 1, i + 1, (i + 1) * batch_size, running_loss / display_interval))
                running_loss = 0.0

        test_true = []
        test_pred = []
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                test_true.extend(labels.tolist())
                images, labels = images.to(device), labels.to(device)  # for GPU

                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)            
                test_pred.extend(predicted.tolist())

        train_acc = accuracy_score(train_true, train_pred)
        test_acc = accuracy_score(test_true, test_pred)
        print('    epocs: {}, train acc.: {:.3f}, test acc.: {:.3f}'.format(epoch + 1, train_acc, test_acc))
        print()
        
        epoch_list.append(epoch + 1)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

    print('Finished Training')

    print('Save Network')
    torch.save(net.state_dict(), 'model.pth')

    df = pd.DataFrame({'epoch': epoch_list,
                       'train/accuracy': train_acc_list,
                       'test/accuracy': test_acc_list})

    print('Save Training Log')
    df.to_csv('train.log', index=False)