コード例 #1
0
 def prepareModel(path):
     '''
     Loads the model and prepares it for inference
     :param path: path to the pytorch state dict file
     :return: model
     '''
     net = Net()
     net.load_state_dict(torch.load(path))
     net.eval()
     return net
コード例 #2
0
def run():
    example = torch.rand(1, 9, 17, 17)

    for maindir, subdir, file_name_list in os.walk("model"):
        for file in file_name_list:
            ext = file.split(".")[-1]
            if ext == "pt":
                d = int(file.split("_")[1])
                w = int(file.split("_")[2])

                model_black = Net(d, w, 9)
                model_black.load_state_dict(torch.load("model/" + file))
                model_black.eval()
                traced = torch.jit.trace(model_black, example)
                print(traced.code)
                traced.save("model/" + file + "s")
コード例 #3
0
def reduced_ann_net(old_net, unit_in, unit_remove, new_hidden_num):
    old_net.hidden.weight[unit_in] += old_net.hidden.weight[unit_remove]
    old_net.hidden.bias[unit_in] += old_net.hidden.bias[unit_remove]

    # Slicing the remained weight values and bias values in a new-sized network.
    new_net = Net(11, new_hidden_num, 3)
    new_net.hidden.weight[:unit_remove] = old_net.hidden.weight[:unit_remove]
    new_net.hidden.weight[unit_remove:] = old_net.hidden.weight[unit_remove +
                                                                1:]

    new_net.hidden.bias[:unit_remove] = old_net.hidden.bias[0:unit_remove]
    new_net.hidden.bias[unit_remove:] = old_net.hidden.bias[unit_remove + 1:]

    new_net.output.weight[:, :unit_remove] = old_net.output.weight[:, 0:
                                                                   unit_remove]
    new_net.output.weight[:,
                          unit_remove:] = old_net.output.weight[:,
                                                                unit_remove +
                                                                1:]

    new_net.output.bias[:] = old_net.output.bias[:]
    new_net.eval()

    return new_net
コード例 #4
0
# print(net)

if os.path.isfile('model.pt'):
    net.load_state_dict(torch.load('model.pt'))

x = np.arange(-math.pi + 0.1, math.pi - 0.05, math.pi / 50).tolist()
y = [fun(i) for i in x]

dataset = (convert_arr(x), convert_arr(y))

# print(dataset)

train(model=net, dataset=dataset, epochs=500, lr=1e-3, device=device)

print(len(x), len(y))
plt.plot(x, y, label='Original')
plt.xlabel('X')
plt.ylabel('Y')

predicted = []

# net.to(torch.device("cpu"))
net.eval()
for i in x:
    tmp = Tensor([i]).to(device)
    pred = net(tmp).tolist()
    predicted.append(pred)

plt.plot(x, predicted, label='Predicted')
plt.legend(shadow=False, )
plt.show()
コード例 #5
0
def search_algo(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_num_threads(1)

    # initialize/load
    task_class = getattr(tasks, args.task)
    if args.no_noise:
        task = task_class(force_std=0.0, torque_std=0.0)
    else:
        task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # initialize preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))

    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = args.depth * 3

    global preprocessor
    # preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels)
    preprocessor = Preprocessor(all_labels=all_labels)

    # initialize the env
    env = RobotGrammarEnv(task,
                          rules,
                          seed=args.seed,
                          mpc_num_processes=args.mpc_num_processes)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(
        state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes=max_nodes, num_channels=num_features,
            num_outputs=1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(
            args.load_V_path))

    # initialize target V_hat look up table
    V_hat = dict()

    # load pretrained V_hat
    if args.load_Vhat_path is not None:
        V_hat_fp = open(args.load_Vhat_path, 'rb')
        V_hat = pickle.load(V_hat_fp)
        V_hat_fp.close()
        print_info('Loaded pretrained Vhat from {}'.format(
            args.load_Vhat_path))

    # initialize invalid_his
    invalid_his = dict()
    num_invalid_samples, num_valid_samples = 0, 0
    repeated_cnt = 0

    # initialize the seen states pool
    states_pool = StatesPool(capacity=args.states_pool_capacity)
    states_set = set()

    # explored designs
    designs = []
    design_rewards = []
    design_opt_seeds = []

    # record prediction error
    prediction_error_sum = 0.0

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w')
        fp_eval.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward', 'opt_seed']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr=args.lr)

        # initialize best design rule sequence
        best_design, best_reward = None, -np.inf

        # reward history
        epoch_rew_his = []
        last_checkpoint = -1

        # recording time
        t_sample_sum = 0.

        # record the count for invalid samples
        no_action_samples, step_exceeded_samples, self_collision_samples = 0, 0, 0

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            # update eps and eps_sample
            if args.eps_schedule == 'linear-decay':
                eps = args.eps_start + epoch / args.num_iterations * (
                    args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(
                    -1.0 * epoch / args.num_iterations / args.eps_decay)

            if args.eps_sample_schedule == 'linear-decay':
                eps_sample = args.eps_sample_start + epoch / args.num_iterations * (
                    args.eps_sample_end - args.eps_sample_start)
            elif args.eps_sample_schedule == 'exp-decay':
                eps_sample = args.eps_sample_end + (
                    args.eps_sample_start - args.eps_sample_end) * np.exp(
                        -1.0 * epoch / args.num_iterations /
                        args.eps_sample_decay)

            t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0

            selected_design, selected_reward = None, -np.inf
            selected_state_seq, selected_rule_seq = None, None

            p = random.random()
            if p < eps_sample:
                num_samples = 1
            else:
                num_samples = args.num_samples

            # use e-greedy to sample a design within maximum #steps.
            for _ in range(num_samples):
                valid = False
                while not valid:
                    t0 = time.time()

                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    no_action_flag = False
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            no_action_flag = True
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        state = next_state
                        if not has_nonterminals(state):
                            break

                    valid = env.is_valid(state)

                    t_sample += time.time() - t0

                    t0 = time.time()

                    if not valid:
                        # update the invalid sample's count
                        if no_action_flag:
                            no_action_samples += 1
                        elif has_nonterminals(state):
                            step_exceeded_samples += 1
                        else:
                            self_collision_samples += 1

                        # update the Vhat for invalid designs
                        update_Vhat(args,
                                    V_hat,
                                    state_seq,
                                    -2.0,
                                    invalid=True,
                                    invalid_cnt=invalid_his)
                        # update states pool
                        update_states_pool(states_pool, state_seq, states_set,
                                           V_hat)
                        num_invalid_samples += 1
                    else:
                        num_valid_samples += 1

                    t_update += time.time() - t0

                predicted_value = predict(V, state)
                if predicted_value > selected_reward:
                    selected_design, selected_reward = state, predicted_value
                    selected_rule_seq, selected_state_seq = rule_seq, state_seq

            t0 = time.time()

            repeated = False
            if (hash(selected_design)
                    in V_hat) and (V_hat[hash(selected_design)] > -2.0 + 1e-3):
                repeated = True
                repeated_cnt += 1

            reward, best_seed = -np.inf, None

            for _ in range(args.num_eval):
                _, rew = env.get_reward(selected_design)
                if rew > reward:
                    reward, best_seed = rew, env.last_opt_seed

            t_mpc += time.time() - t0

            # save the design and the reward in the list
            designs.append(selected_rule_seq)
            design_rewards.append(reward)
            design_opt_seeds.append(best_seed)

            # update best design
            if reward > best_reward:
                best_design, best_reward = selected_rule_seq, reward
                print_info(
                    'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}'
                    .format(reward, selected_reward, num_samples))

            t0 = time.time()

            # update V_hat for the valid design
            update_Vhat(args, V_hat, selected_state_seq, reward)

            # update states pool for the valid design
            update_states_pool(states_pool, selected_state_seq, states_set,
                               V_hat)

            t_update += time.time() - t0

            t0 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.opt_iter):
                minibatch = states_pool.sample(
                    min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                max_nodes = 0
                for robot_graph in minibatch:
                    hash_key = hash(robot_graph)
                    target_reward = V_hat[hash_key]
                    # adj_matrix, features, masks = preprocessor.preprocess(robot_graph)
                    adj_matrix, features, _ = preprocessor.preprocess(
                        robot_graph)
                    max_nodes = max(max_nodes, len(features))
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    # train_masks.append(masks)
                    train_reward.append(target_reward)
                for i in range(len(minibatch)):
                    train_adj_matrix[i], train_features[i], masks = \
                        preprocessor.pad_graph(train_adj_matrix[i], train_features[i], max_nodes)
                    train_masks.append(masks)

                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)

                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch,
                                                    train_adj_matrix_torch,
                                                    train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t_opt += time.time() - t0

            t_end = time.time()

            t_sample_sum += t_sample

            # logging
            if (epoch + 1
                ) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir,
                                             '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok=True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save V_hat
                save_path = os.path.join(iter_save_dir, 'V_hat')
                fp = open(save_path, 'wb')
                pickle.dump(V_hat, fp)
                fp.close()

            # save explored design and its reward
            fp_csv = open(design_csv_path, 'a')
            fieldnames = ['rule_seq', 'reward', 'opt_seed']
            writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
            for i in range(last_checkpoint + 1, len(designs)):
                writer.writerow({
                    'rule_seq': str(designs[i]),
                    'reward': design_rewards[i],
                    'opt_seed': design_opt_seeds[i]
                })
            last_checkpoint = len(designs) - 1
            fp_csv.close()

            epoch_rew_his.append(reward)

            avg_loss = total_loss / args.opt_iter
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            prediction_error_sum += (selected_reward - reward)**2
            avg_prediction_error = prediction_error_sum / (epoch + 1)

            if repeated:
                print_white('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\
                    epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                    avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward))
            else:
                print_warning('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\
                    epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                    avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward))

            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\
                eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, selected_reward, reward, avg_reward))
            fp_log.close()

            if (epoch + 1) % args.log_interval == 0:
                print_info(
                    'Avg sampling time for last {} epoch: {:.4f} second'.
                    format(args.log_interval,
                           t_sample_sum / args.log_interval))
                t_sample_sum = 0.
                print_info('size of states_pool = {}'.format(len(states_pool)))
                print_info(
                    '#valid samples = {}, #invalid samples = {}, #valid / #invalid = {}'
                    .format(
                        num_valid_samples, num_invalid_samples,
                        num_valid_samples / num_invalid_samples
                        if num_invalid_samples > 0 else 10000.0))
                print_info(
                    'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #self_collision_samples = {}'
                    .format(no_action_samples, step_exceeded_samples,
                            self_collision_samples))
                max_trials, cnt = 0, 0
                for key in invalid_his.keys():
                    if invalid_his[key] > max_trials:
                        if key not in V_hat:
                            max_trials = invalid_his[key]
                        elif V_hat[key] < -2.0 + 1e-3:
                            max_trials = invalid_his[key]
                    if invalid_his[key] >= args.max_trials:
                        if V_hat[key] < -2.0 + 1e-3:
                            cnt += 1

                print_info(
                    'max invalid_trials = {}, #failed nodes = {}'.format(
                        max_trials, cnt))
                print_info('repeated rate = {}'.format(repeated_cnt /
                                                       (epoch + 1)))

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(0, 11):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                vaild = False
                while not valid:
                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        if not has_nonterminals(next_state):
                            valid = True
                            break
                        state = next_state

                _, reward = env.get_reward(state)
                reward_sum += reward
                best_reward = max(best_reward, reward)
                print(
                    f'design {epoch}: reward = {reward}, time = {time.time() - t0}'
                )

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        ax[0].plot(x, y0)
        ax[0].set_title('Avg Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        ax[1].plot(x, y1)
        ax[0].set_title('Best Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        plt.show()
コード例 #6
0
    gas_eval_y = data['gas_eval_y']
    gas_eval_global_prob = data['gas_eval_global_prob']
else:
    import torch
    import torch.nn as nn
    from torch.optim import *
    from torch.optim.lr_scheduler import *
    from torch.autograd import Variable
    from Net import Net
    from util_in import *

    # Load model
    args.kernel_size = tuple(int(x) for x in args.kernel_size.split('x'))
    model = Net(args).cuda()
    model.load_state_dict(torch.load(MODEL_FILE)['model'])
    model.eval()

    # Load DCASE data
    dcase_valid_x, dcase_valid_y, _ = bulk_load('DCASE_valid')
    dcase_test_x, dcase_test_y, dcase_test_hashes = bulk_load('DCASE_test')
    dcase_test_frame_y = load_dcase_test_frame_truth()
    DCASE_CLASS_IDS = [
        318, 324, 341, 321, 307, 310, 314, 397, 325, 326, 323, 319, 14, 342,
        329, 331, 316
    ]

    # Predict on DCASE data
    dcase_valid_global_prob = model.predict(dcase_valid_x,
                                            verbose=False)[:, DCASE_CLASS_IDS]
    dcase_thres = optimize_micro_avg_f1(dcase_valid_global_prob, dcase_valid_y)
    dcase_test_outputs = model.predict(dcase_test_x, verbose=True)
コード例 #7
0
idx2Wd = {idx: wd for idx, wd in enumerate(vocab)}

#读取测试集
test_data_path = root_path + '\\Data\\qtest7'
testline, testvec = get_test_data(test_data_path, wd2Idx, sentence_len)
testbatch = get_test_batch(testvec, BATCH_SIZE, 0)  #得到一个batch的测试数据

#读取网络
net7 = Net(sentence_len=sentence_len,
           batch_size=BATCH_SIZE,
           vocab_size=vocab_size,
           embed_size=embed_size,
           hidden_size=hidden_size)
net_path = root_path + '\\Models\\rnn\\rnn7_epoch_1.pth'
net7.load_state_dict(torch.load(net_path))
net7.eval()  #表示此时网络不在训练
testbatch = np.array(testbatch)
print(testbatch.shape)

output = net7(testbatch, False)  #一个batch测试的输出

output = torch.reshape(output, (BATCH_SIZE * 3 * sentence_len,
                                vocab_size))  #每个输入的输出是3(句)*sentence_len个字的概率分布

wordidx = torch.argmax(output, dim=1)  #取概率最大的

#输出测试集的结果
with open("out7.txt", "w", encoding="utf-8") as f:
    for i in range(BATCH_SIZE):
        f.write('\n')
        f.write('\n')
コード例 #8
0
idx2Wd = {idx: wd for idx, wd in enumerate(vocab)}

#读取测试集
test_data_path = root_path + '\\Data\\qtest5'
testline, testvec = get_test_data(test_data_path, wd2Idx, 5)
testbatch = get_test_batch(testvec, BATCH_SIZE, 0)  #得到一个batch的测试数据

#读取网络
net5 = Net(sentence_len=sentence_len,
           batch_size=BATCH_SIZE,
           vocab_size=vocab_size,
           embed_size=embed_size,
           hidden_size=hidden_size)
net_path = root_path + '\\Models\\rnn\\rnn5_epoch_4.pth'
net5.load_state_dict(torch.load(net_path))
net5.eval()  #表示此时网络不在训练
testbatch = np.array(testbatch)
print(testbatch.shape)

output = net5(testbatch, False)  #一个batch测试的输出

output = torch.reshape(output, (BATCH_SIZE * 3 * sentence_len,
                                vocab_size))  #每个输入的输出是3(句)*sentence_len个字的概率分布

wordidx = torch.argmax(output, dim=1)  #取概率最大的

#输出测试集的结果
with open("out5.txt", "w", encoding="utf-8") as f:
    for i in range(BATCH_SIZE):
        f.write('\n')
        f.write('\n')
コード例 #9
0
class TrainModel:
    def __init__(self):
        torch.cuda.empty_cache()
        self.learningRate = LEARNING_RATE
        db = ImageClassifierDataset()
        db.loadData()
        train_set, test_set = db.splitDataSet()
        self.trainSetSize = len(train_set)
        self.testSetSize = len(test_set)

        print(f"Train set: {self.trainSetSize} Test set: {self.testSetSize}")

        self.train_loader = DataLoader(train_set,
                                       batch_size=BATCH_SIZE,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=True)

        self.test_loader = DataLoader(test_set,
                                      batch_size=BATCH_SIZE,
                                      shuffle=False,
                                      num_workers=4,
                                      pin_memory=True)

        self.cuda_avail = torch.cuda.is_available()

        self.model = Net()

        print(f"Cuda: {self.cuda_avail}")
        if self.cuda_avail:
            self.model.cuda()

        self.optimizer = Adam(self.model.parameters(),
                              lr=LEARNING_RATE)  #, weight_decay=WEIGHT_DECAY)
        self.loss_fn = nn.BCELoss()

    def adjust_learning_rate(self, epoch):
        """if epoch > 180:
            self.learningRate /= 1000000
        elif epoch > 150:
            self.learningRate /= 100000
        elif epoch > 120:
            self.learningRate /= 10000
        elif epoch > 90:
            self.learningRate /= 1000
        elif epoch > 60:
            self.learningRate /= 100
        elif epoch > 30:
            self.learningRate /= 10"""
        for param_group in self.optimizer.param_groups:
            param_group["lr"] = self.learningRate * LEARNING_RATE_DECAY

    def save_models(self):
        torch.save(self.model.state_dict(), "weights/myModel.model")
        print("Checkpoint saved")

    def test(self):
        self.model.eval()
        test_acc = 0.0
        test_loss = 0.0
        for i, (images, labels) in enumerate(self.test_loader):

            if self.cuda_avail:
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())

            outputs = self.model(images)
            # _, prediction = torch.max(outputs.data, 1)
            # prediction = prediction.cpu().numpy()
            prediction = torch.round(outputs.data)
            #print(outputs, labels, prediction)

            loss = self.loss_fn(outputs, labels)
            test_loss += loss.cpu().data.item() * images.size(0)
            test_acc += torch.sum(torch.eq(prediction, labels.data))

        test_acc /= self.testSetSize
        test_loss /= self.testSetSize

        return test_acc, test_loss

    def train(self):
        best_acc = 0.0
        best_loss = 1
        epochsSinceLastImprovement = 0
        losses = []
        print("Starting train...")
        for epoch in range(EPOCHS):
            startTime = time.time()
            self.model.train()
            train_acc = 0.0
            train_loss = 0.0
            for i, (images, labels) in enumerate(self.train_loader):
                if self.cuda_avail:
                    images = Variable(images.cuda())
                    labels = Variable(labels.cuda())

                self.optimizer.zero_grad()
                outputs = self.model(images)

                loss = self.loss_fn(outputs, labels)

                loss.backward()

                self.optimizer.step()

                train_loss += loss.cpu().data.item() * images.size(0)
                prediction = torch.round(outputs.data)
                train_acc += torch.sum(torch.eq(prediction, labels.data))

            self.adjust_learning_rate(epoch)

            train_acc /= self.trainSetSize
            train_loss /= self.trainSetSize

            test_acc, test_loss = self.test()

            if test_loss + LOSS_IMPROVEMENT < best_loss:
                if epoch != 0:
                    self.save_models()
                # best_acc = test_acc
                best_loss = test_loss
                epochsSinceLastImprovement = 0
            else:
                epochsSinceLastImprovement += 1

                if epochsSinceLastImprovement == EARLY_STOP:
                    print(
                        f"Epoch {epoch}, Train Accuracy: {train_acc} , TrainLoss: {train_loss} , Test Accuracy: {test_acc}, TestLoss: {test_loss} Time: {time.time() - startTime}"
                    )

                    print(
                        f"No improvement in {epochsSinceLastImprovement} epochs, stopping..."
                    )
                    losses.append(test_loss)
                    break

            print(
                f"Epoch {epoch}, Train Accuracy: {train_acc} , TrainLoss: {train_loss} , Test Accuracy: {test_acc}, TestLoss: {test_loss} Time: {time.time() - startTime}"
            )
            losses.append(test_loss)
        plt.plot(losses)
        plt.show()
コード例 #10
0
import torch
import pandas as pd
from reducing_net import reduced_ann_net
from Net import Net, test_model
from utils import confusion, F1_score, loadDataset, saveNNParas
import time

# Loading the previous network status.
feature_num = 11
hidden_num = 30
output_num = 3

load_net = Net(feature_num, hidden_num, output_num)
load_net.load_state_dict(torch.load('ann_net_model_genre.pt'))
#load_net.load_state_dict(torch.load('net_model_subjective_rating.pt'))
load_net.eval()

# Loading testing dataset to evaluate new network.
x_test, y_test = loadDataset('testing')

# Loading the information of vector.
vectors = pd.read_excel('ann_vector_angle_sample.xls', header=None)
raw_df = pd.DataFrame({
    'row': vectors.iloc[:, 0],
    'col': vectors.iloc[:, 1],
    'vector': vectors.iloc[:, 2]
})

# Sorting by the values of vector angle in ascending order.
increase_res = raw_df.sort_values('vector', ascending=True)
unique_row = increase_res.row.unique()
コード例 #11
0
def search_algo_1(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # initialize/load
    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = 80
    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # state preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))
    global preprocessor
    preprocessor = Preprocessor(max_nodes=max_nodes, all_labels=all_labels)

    # initialize the env
    env = RobotGrammarEnv(task,
                          rules,
                          enable_reward_oracle=True,
                          preprocessor=preprocessor)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(
        state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes=max_nodes, num_channels=num_features,
            num_outputs=1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(
            args.load_V_path))

    # initialize target V_hat look up table
    V_hat = dict()

    # load pretrained V_hat
    if args.load_Vhat_path is not None:
        V_hat_fp = open(args.load_Vhat_path, 'rb')
        V_hat = pickle.load(V_hat_fp)
        V_hat_fp.close()
        print_info('Loaded pretrained Vhat from {}'.format(
            args.load_Vhat_path))

    # initialize the seen states pool
    states_pool = StatesPool(capacity=args.states_pool_capacity)
    all_sample_designs = []

    # explored designs
    designs = []
    design_rewards = []

    # load previously explored designs
    if args.load_designs_path is not None:
        fp_csv = open(args.load_designs_path, newline='')
        reader = csv.DictReader(fp_csv)
        for row in reader:
            rule_seq = ast.literal_eval(row['rule_seq'])
            reward = float(row['reward'])
            state = make_initial_graph()
            for i in range(len(rule_seq)):
                state = env.transite(state, rule_seq[i])
            designs.append(state)
            design_rewards.append(reward)
            if not np.isclose(V_hat[hash(state)], reward):
                print(rule_seq)
                print(V_hat[hash(state)], reward)
                print_error("Vhat and designs don't match")
        fp_csv.close()
        print_info('Loaded pretrained designs from {}'.format(
            args.load_designs_path))

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w')
        fp_eval.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr=args.lr)

        # initialize best design rule sequence
        best_design, best_reward = None, -np.inf

        # reward history
        epoch_rew_his = []
        last_checkpoint = -1

        # recording time
        t_sample_sum = 0.

        # record the count for invalid samples
        no_action_samples, step_exceeded_samples = 0, 0

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            # update eps and eps_sample
            if args.eps_schedule == 'linear-decay':
                eps = args.eps_start + epoch / args.num_iterations * (
                    args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(
                    -1.0 * epoch / args.num_iterations / args.eps_decay)

            if args.eps_sample_schedule == 'linear-decay':
                eps_sample = args.eps_sample_start + epoch / args.num_iterations * (
                    args.eps_sample_end - args.eps_sample_start)
            elif args.eps_sample_schedule == 'exp-decay':
                eps_sample = args.eps_sample_end + (
                    args.eps_sample_start - args.eps_sample_end) * np.exp(
                        -1.0 * epoch / args.num_iterations /
                        args.eps_sample_decay)

            t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0

            best_candidate_design, best_candidate_reward = None, -1.0
            best_candidate_state_seq, best_candidate_rule_seq = None, None

            p = random.random()
            if p < eps_sample:
                num_samples = 1
            else:
                num_samples = args.num_samples

            # use e-greedy to sample a design within maximum #steps.
            for _ in range(num_samples):
                valid = False
                while not valid:
                    t0 = time.time()

                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    random_step_cnt, optimal_step_cnt = 0, 0
                    no_action_flag = False
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            no_action_flag = True
                            break
                        if step_type == 'random':
                            random_step_cnt += 1
                        elif step_type == 'optimal':
                            optimal_step_cnt += 1
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        state = next_state
                        if env.is_valid(next_state):
                            valid = True
                            break

                    t_sample += time.time() - t0

                    t0 = time.time()

                    # update the invalid sample's count
                    if not valid:
                        if no_action_flag:
                            no_action_samples += 1
                        else:
                            step_exceeded_samples += 1

                    # update the Vhat for invalid designs
                    if not valid:
                        update_Vhat(V_hat, state_seq, 0.0)
                        # update states pool
                        update_states_pool(states_pool, state_seq)

                    # if valid but has been explored as a valid design before, then put in state pool but resample it
                    if valid and (hash(state)
                                  in V_hat) and (V_hat(hash(state)) > 1e-3):
                        update_Vhat(V_hat, state_seq, V_hat[hash(state)])
                        update_states_pool(states_pool, state_seq)
                        valid = False

                    # record the sampled design
                    all_sample_designs.append(rule_seq)

                    t_update += time.time() - t0

                predicted_value = predict(V, state)
                if predicted_value > best_candidate_reward:
                    best_candidate_design, best_candidate_reward = state, predicted_value
                    best_candidate_rule_seq, best_candidate_state_seq = rule_seq, state_seq

            t0 = time.time()

            _, reward = env.get_reward(best_candidate_design)

            t_mpc += time.time() - t0

            # save the design and the reward in the list
            designs.append(best_candidate_rule_seq)
            design_rewards.append(reward)

            # update best design
            if reward > best_reward:
                best_design, best_reward = best_candidate_rule_seq, reward
                print_info(
                    'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}'
                    .format(reward, best_candidate_reward, num_samples))

            t0 = time.time()

            # update V_hat for the valid design
            update_Vhat(V_hat, best_candidate_state_seq, reward)

            # update states pool for the valid design
            update_states_pool(states_pool, best_candidate_state_seq)

            t_update += time.time() - t0

            t0 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.opt_iter):
                minibatch = states_pool.sample(
                    min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                for robot_graph in minibatch:
                    hash_key = hash(robot_graph)
                    target_reward = V_hat[hash_key]
                    adj_matrix, features, masks = preprocessor.preprocess(
                        robot_graph)
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    train_masks.append(masks)
                    train_reward.append(target_reward)

                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)

                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch,
                                                    train_adj_matrix_torch,
                                                    train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t_opt += time.time() - t0

            t_end = time.time()

            t_sample_sum += t_sample

            # logging
            if (epoch + 1
                ) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir,
                                             '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok=True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save V_hat
                save_path = os.path.join(iter_save_dir, 'V_hat')
                fp = open(save_path, 'wb')
                pickle.dump(V_hat, fp)
                fp.close()
                # save all_sampled_designs
                save_path = os.path.join(iter_save_dir, 'all_sampled_designs')
                fp = open(save_path, 'wb')
                pickle.dump(all_sample_designs, fp)
                fp.close()
                # save explored design and its reward
                fp_csv = open(design_csv_path, 'a')
                fieldnames = ['rule_seq', 'reward']
                writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
                for i in range(last_checkpoint + 1, len(designs)):
                    writer.writerow({
                        'rule_seq': str(designs[i]),
                        'reward': design_rewards[i]
                    })
                last_checkpoint = len(designs) - 1
                fp_csv.close()

            epoch_rew_his.append(reward)

            avg_loss = total_loss / args.depth
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            print('Epoch {}: T_sample = {:.2f}, T_update = {:.2f}, T_mpc = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, eps_sample = {:.3f}, #samples = {} = {}, training loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(\
                epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \
                avg_loss, best_candidate_reward, reward, avg_reward, best_reward))

            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\
                eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, best_candidate_reward, reward, avg_reward))
            fp_log.close()

            if (epoch + 1) % args.log_interval == 0:
                print_info(
                    'Avg sampling time for last {} epoch: {:.4f} second'.
                    format(args.log_interval,
                           t_sample_sum / args.log_interval))
                t_sample_sum = 0.
                invalid_cnt, valid_cnt = 0, 0
                for state in states_pool.pool:
                    if np.isclose(V_hat[hash(state)], 0.):
                        invalid_cnt += 1
                    else:
                        valid_cnt += 1
                print_info(
                    'states_pool size = {}, #valid = {}, #invalid = {}, #valid / #invalid = {}'
                    .format(len(states_pool), valid_cnt, invalid_cnt,
                            valid_cnt / invalid_cnt))
                print_info(
                    'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #no_action / #step_exceeded = {}'
                    .format(no_action_samples, step_exceeded_samples,
                            no_action_samples / step_exceeded_samples))

            # evaluation
            if args.eval_interval > 0 and (
                (epoch + 1) % args.eval_interval == 0
                    or epoch + 1 == args.num_iterations):
                print_info('-------- Doing evaluation --------')
                print_info('#states = {}'.format(len(states_pool)))
                loss_total = 0.
                for state in states_pool.pool:
                    value = predict(V, state)
                    loss_total += (V_hat[hash(state)] - value)**2
                print_info('Loss = {:.3f}'.format(loss_total /
                                                  len(states_pool)))
                fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'a')
                fp_eval.write('epoch = {}, loss = {:.3f}\n'.format(
                    epoch + 1, loss_total / len(states_pool)))
                fp_eval.close()

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(0, 11):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                vaild = False
                while not valid:
                    state = env.reset()
                    rule_seq = []
                    state_seq = [state]
                    for _ in range(args.depth):
                        action, step_type = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state = env.transite(state, action)
                        state_seq.append(next_state)
                        if env.is_valid(state_next):
                            valid = True
                            break
                        state = next_state

                _, reward = env.get_reward(state)
                reward_sum += reward
                best_reward = max(best_reward, reward)
                print(
                    f'design {epoch}: reward = {reward}, time = {time.time() - t0}'
                )

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        ax[0].plot(x, y0)
        ax[0].set_title('Avg Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        ax[1].plot(x, y1)
        ax[0].set_title('Best Reward')
        ax[0].set_xlabel('eps')
        ax[0].set_ylabel('reward')

        plt.show()
コード例 #12
0
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LR)

epoch = 7
for epoch in range(epoch):
    sum_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = Variable(inputs), Variable(labels)
        optimizer.zero_grad()  # 梯度归零
        outputs = net(inputs)  # 前向运算
        loss = criterion(outputs, labels)  # 计算损失
        loss.backward()  # 反向传播
        optimizer.step()  # 参数更新

        print(loss.item())

# 测试
net.eval()  # 转为测试模式
correct = 0
total = 0
for data_test in test_loader:
    images, labels = data_test
    images, labels = Variable(images), Variable(labels)
    output_test = net(images)
    _, predicted = torch.max(output_test, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
print("correct1: ", correct)
print("test acc: {0}".format(correct.item() / len(test_dataset)))
# -*- coding: utf-8 -*-
"""
@author: Ulrich
"""

import torch
from Net import Net, test_model
from utils import confusion, F1_score, loadDataset

# Reload the parameters of the trained model.
load_net = Net(11, 30, 3)
load_net.load_state_dict(torch.load('net_model_subjective_rating.pt'))
load_net.eval()
"""
Manual operation on network reduction.

Scheme for units removal:
    17 ->  3
    24  -> 6
    23 ->  9
The units that will be removed are 17, 23, 24.
"""

# Operation of addition.
load_net.hidden.weight[2] += load_net.hidden.weight[16]
load_net.hidden.weight[5] += load_net.hidden.weight[23]
load_net.hidden.weight[8] += load_net.hidden.weight[22]

# Slicing the remained weight values and bias values in a new-sized network.
new_net = Net(11, 27, 3)
new_net.hidden.weight[:16] = load_net.hidden.weight[:16]
コード例 #14
0
ファイル: PoseEstimation.py プロジェクト: cmacw/indiv_project
class PoseEstimation:
    def __init__(self,
                 trainset_info,
                 testset_info=None,
                 lr=0.001,
                 wd=0,
                 radial=False):
        self.trainset_info = trainset_info
        self.radial = radial

        # Tensor using CPU or GPU
        self.device = self._use_cuda()

        # model setup
        self.net = Net()
        self.net.to(self.device)
        if radial:
            self.criterion = nn.L1Loss()
        else:
            self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.net.parameters(),
                                    lr=lr,
                                    weight_decay=wd)

        # Input data setup
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        self.trsfm = transforms.Compose(
            [transforms.Resize((128, 128)),
             transforms.ToTensor(), normalize])
        # self.trsfm = transforms.Compose([transforms.ToTensor()])
        self.trainset = PosEstimationDataset(self.trainset_info,
                                             transform=self.trsfm,
                                             radial=radial)
        self.norm_range = self.trainset.get_norm_range()
        self.trainloader = DataLoader(
            self.trainset,
            batch_size=self.trainset_info["batch_size"],
            shuffle=True)

        # Set up testset
        if testset_info is not None:
            self.load_test_set(testset_info, radial=radial)

        # initialise directory for saving training results
        self.save_dir = os.path.join(
            trainset_info["path"], trainset_info["dataset_name"] + "_results",
            "eph{}_bs{}_lr{}_wd{}".format(trainset_info["epochs"],
                                          trainset_info["batch_size"], lr, wd))

    def load_test_set(self, testset_info, radial=False, webcam_test=False):
        self.testset_info = testset_info
        self.testset = PosEstimationDataset(self.testset_info, self.trsfm,
                                            self.norm_range, radial,
                                            webcam_test)
        self.testloader = DataLoader(self.testset, shuffle=True)

    def train(self, show_fig=True, save_output=True, eval_eph=False):
        # Create directory for saving results
        os.makedirs(self.save_dir, exist_ok=False)

        loss_sample_size = len(self.trainloader) // 4

        # Initialise loss array
        train_losses = np.zeros(self.trainset_info["epochs"] *
                                len(self.trainloader))

        # Initialise distance and angle diff array
        eph_losses = np.zeros([self.trainset_info["epochs"], 2])
        eph_diff = np.zeros([self.trainset_info["epochs"], 4])

        # Begin training
        t0 = time.time()
        try:
            for epoch in range(self.trainset_info["epochs"]
                               ):  # loop over the dataset multiple times
                print('\n[Epoch', epoch + 1, ']')
                running_loss = 0.0
                for i, data in enumerate(self.trainloader):
                    # Set network to training mode
                    self.net.train()

                    # get the inputs; data is a dictionary of {image, pos}
                    image, pos = data['image'].to(self.device), data['pos'].to(
                        self.device)

                    # zero the parameter gradients
                    self.optimizer.zero_grad()

                    # forward + backward + optimize
                    outputs = self.net(image)
                    loss = self.criterion(outputs, pos)
                    loss.backward()
                    self.optimizer.step()

                    # Calculate the difference in euclidean distance and angles
                    train_losses[epoch * len(self.trainloader) +
                                 i] = loss.item()

                    # print statistics
                    # running_loss += loss.item()
                    # if i % loss_sample_size == loss_sample_size - 1:
                    #     print('[{}, {}] loss: {:.5f}'.
                    #           format(epoch + 1, i + 1, running_loss / loss_sample_size))
                    #     running_loss = 0.0

                # Run evaluation and show results
                if eval_eph:
                    eph_losses[epoch], eph_diff[epoch, :] = self.evaluation()

        except KeyboardInterrupt:
            pass

        t1 = time.time()
        print('Time taken: {}'.format(t1 - t0))

        # Save output
        if save_output:
            self.save_model_output(train_losses, eph_losses, eph_diff)

        if show_fig:
            self.display_training_fig(train_losses, eph_losses, eph_diff)

        print('\n--- Finished Training ---\n')

        # Evaluation use model in the class to run

    def evaluation(self):
        assert self.testset is not None, \
            "No testset is supplied. Make sure PoseEstimation.load_test_set(set_info) is called"
        # Initialise loss array
        losses = np.zeros(len(self.testloader))

        # Initialise distance and angle diff array
        diff = np.zeros([len(self.testloader), 2])

        # turn on evaluation mode
        self.net.eval()

        # start evaluation
        for i, data in enumerate(self.testloader):
            # get the inputs; data is a dictionary of {image, pos}
            image, pos = data['image'].to(self.device), data['pos'].to(
                self.device)

            # forward
            outputs = self.net(image)
            loss = self.criterion(outputs, pos)

            # Calculate the error
            losses[i] = loss.item()
            diff[i] = self.cal_error(outputs, pos)

        print("true   : {}".format(pos[-1]))
        print("predict: {}".format(outputs[-1]))
        return self.print_avg_stat(losses, diff)

    def _use_cuda(self):
        device = torch.device("cpu")
        if torch.cuda.is_available():
            device = torch.device("cuda:0")
            with warnings.catch_warnings(record=True) as w:
                warnings.filterwarnings("error")
                try:
                    torch.cuda.get_device_capability(device)
                except Exception:
                    device = torch.device("cpu")

        print(device)
        return device

    def show_batch_image(self):
        for i_batch, sample_batched in enumerate(self.trainloader):
            print(i_batch, sample_batched['image'].size(),
                  sample_batched['pos'].size())

            images_batch = sample_batched["image"]

            if i_batch == 0:
                plt.figure()
                grid = torchvision.utils.make_grid(images_batch)
                plt.imshow(grid.numpy().transpose((1, 2, 0)))
                plt.axis('off')
                plt.ioff()
                plt.show()
                break

    # Save model and losses
    def save_model_output(self, train_losses, test_losses, test_diff):
        self.net.save_model_parameter(self.trainset_info, self.save_dir)
        self.save_array2csv(self.trainset_info, train_losses, "train_loss")
        self.save_array2csv(self.trainset_info, test_losses, "eph_loss")
        self.save_array2csv(self.trainset_info, test_diff, "diff")

    # Visualise the losses and deviation
    def display_training_fig(self, train_losses, test_losses, test_diff):
        self.plot_array(train_losses, "Loss", self.trainset_info, scatter=True)
        if self.radial:
            self.plot_array(test_diff[:, 0],
                            "Difference_in_distance(m)",
                            self.trainset_info,
                            std=test_diff[:, 1])
        else:
            self.plot_array(test_diff[:, 0],
                            "Difference_in_distance(m)",
                            self.trainset_info,
                            std=test_diff[:, 2])
            self.plot_array(test_diff[:, 1],
                            "Difference_in_angle(deg)",
                            self.trainset_info,
                            std=test_diff[:, 3])

        avg_train_losses = np.average(train_losses.reshape(
            -1, len(self.trainloader)),
                                      axis=1)
        plt.figure()
        plt.plot(range(1,
                       len(avg_train_losses) + 1),
                 avg_train_losses,
                 label="train")
        plt.plot(range(1,
                       len(test_losses) + 1),
                 test_losses[:, 1],
                 label="test")
        plt.ylabel("Loss")
        plt.xlabel("epoch")
        plt.legend()
        fig_name = "fig_{}_eph{}_bs{}_{}.png".format(
            self.trainset_info["dataset_name"], self.trainset_info["epochs"],
            self.trainset_info["batch_size"], "Loss_comp")
        file_path = os.path.join(self.save_dir, fig_name)
        plt.savefig(file_path)

    def plot_array(self, data, ylabel, trainset_info, scatter=False, std=None):
        plt.figure()
        if scatter:
            x = np.arange(len(data))
            plt.plot(x, data, marker='o', markersize=0.6, linewidth='0')
            plt.yscale("log")
            plt.xlabel("batch")
        else:
            plt.errorbar(range(1,
                               len(data) + 1),
                         data,
                         yerr=std,
                         ecolor="k",
                         capsize=3)
            plt.xlabel("epoch")

        plt.ylabel(ylabel)
        fig_name = "fig_{}_eph{}_bs{}_{}.png".format(
            trainset_info["dataset_name"], trainset_info["epochs"],
            trainset_info["batch_size"], ylabel)
        file_path = os.path.join(self.save_dir, fig_name)
        plt.savefig(file_path)
        plt.close('all')

    def save_array2csv(self, trainset_info, data, name):
        file_name = "{}_{}_eph{}_bs{}.csv".format(
            name, trainset_info["dataset_name"], trainset_info["epochs"],
            trainset_info["batch_size"])
        file_path = os.path.join(self.save_dir, file_name)
        np.savetxt(file_path, data, delimiter=",")

    def cal_error(self, predict, true):

        # predict and ture has size [batch_size, 6]
        # [:, :3] is the translational position
        # [:, 3:] is the rotation in euler angle
        # De-normalise
        predict_np = self._denormalise(predict.cpu().detach().numpy())
        true_np = self._denormalise(true.cpu().detach().numpy())

        if self.radial:
            return predict_np - true_np
        else:
            # Get the euclidean distance
            error_distances = np.linalg.norm(
                (predict_np[:, :3] - true_np[:, :3]), axis=1)

            # Calculate the rotation angle from predicated(output) to true(input)
            # diff * output = pos
            # diff = pos * inv(output)
            # Since the rotvec is the vector of the axis multplited by the angle
            # The angle is found by finding magnitude of the vector
            predict_rot = Rotation.from_quat(predict_np[:, 3:])
            true_rot = Rotation.from_quat(true_np[:, 3:])
            rot = true_rot * predict_rot.inv()
            diff_angle = rot.as_rotvec()
            error_rot = np.linalg.norm(diff_angle, axis=1)
            error_rot = np.rad2deg(error_rot)

            return [error_distances, error_rot]

    def _denormalise(self, pos):
        return pos * (self.norm_range["max"] -
                      self.norm_range["min"]) + self.norm_range["min"]

    def load_model_parameter(self, path):
        self.net.load_state_dict(torch.load(path))

    def print_avg_stat(self, losses, diff):
        avg_loss = np.average(losses)
        avg_diff = np.average(diff, axis=0)
        std_loss = np.std(losses)
        std_diff = np.std(diff, axis=0)
        print(self.trainset.dataset_name)
        print("Test avg loss: {:.5f} | avg[distance, angle] {}".format(
            avg_loss, avg_diff))
        print("Test std loss: {:.5f} | std[distance, angle] {}".format(
            std_loss, std_diff))

        return [avg_loss, std_loss], np.concatenate((avg_diff, std_diff),
                                                    axis=0)
コード例 #15
0
def search_algo_2(args):
    # iniailize random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # initialize/load
    # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc
    max_nodes = 80
    task_class = getattr(tasks, args.task)
    task = task_class()
    graphs = rd.load_graphs(args.grammar_file)
    rules = [rd.create_rule_from_graph(g) for g in graphs]

    # state preprocessor
    # Find all possible link labels, so they can be one-hot encoded
    all_labels = set()
    for rule in rules:
        for node in rule.lhs.nodes:
            all_labels.add(node.attrs.require_label)
    all_labels = sorted(list(all_labels))
    global preprocessor
    preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels)

    # initialize the env
    env = RobotGrammarEnv(task, rules, enable_reward_oracle = True, preprocessor = preprocessor)

    # initialize Value function
    device = 'cpu'
    state = env.reset()
    sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state)
    num_features = sample_features.shape[1]
    V = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = 1).to(device)

    # load pretrained V function
    if args.load_V_path is not None:
        V.load_state_dict(torch.load(args.load_V_path))
        print_info('Loaded pretrained V function from {}'.format(args.load_V_path))

    if not args.test:
        # initialize save folders and files
        fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w')
        fp_log.close()
        design_csv_path = os.path.join(args.save_dir, 'designs.csv')
        fp_csv = open(design_csv_path, 'w')
        fieldnames = ['rule_seq', 'reward']
        writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
        writer.writeheader()
        fp_csv.close()

        # initialize the optimizer
        global optimizer
        optimizer = torch.optim.Adam(V.parameters(), lr = args.lr)

        # initialize best design
        best_design, best_reward = None, -np.inf
        
        # initialize the seen states pool
        states_pool = []
        
        # initialize visited states
        state_set = set()

        # TODO: load previously explored designs
        
        # explored designs
        designs = []
        design_rewards = []
        
        # reward history
        epoch_rew_his = []

        for epoch in range(args.num_iterations):
            t_start = time.time()

            V.eval()

            t0 = time.time()

            # use e-greedy to sample a design within maximum #steps.
            if args.eps_schedule == 'linear-decay':
                # linear schedule
                eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start)
            elif args.eps_schedule == 'exp-decay':
                # exp schedule
                eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(-1.0 * epoch / args.num_iterations / args.eps_decay)

            done = False
            while not done:
                state = env.reset()
                rule_seq = []
                state_seq = [state]
                total_reward = 0.
                for _ in range(args.depth):
                    action = select_action(env, V, state, eps)
                    if action is None:
                        break
                    rule_seq.append(action)
                    next_state, reward, done = env.step(action)
                    total_reward += reward
                    state_seq.append(next_state)
                    state = next_state
                    if done:
                        break
            
            # save the design and the reward in the list
            designs.append(rule_seq)
            design_rewards.append(total_reward)

            # update best design
            if total_reward > best_reward:
                best_design, best_reward = rule_seq, total_reward
            
            # update state pool
            for ancestor in state_seq:
                state_hash_key = hash(ancestor)
                if not (state_hash_key in state_set):
                    state_set.add(state_hash_key)
                    states_pool.append(ancestor)

            t1 = time.time()

            # optimize
            V.train()
            total_loss = 0.0
            for _ in range(args.depth):
                minibatch = random.sample(states_pool, min(len(states_pool), args.batch_size))

                train_adj_matrix, train_features, train_masks, train_reward = [], [], [], []
                for robot_graph in minibatch:
                    V_hat = compute_Vhat(robot_graph, env, V)
                    adj_matrix, features, masks = preprocessor.preprocess(robot_graph)
                    train_adj_matrix.append(adj_matrix)
                    train_features.append(features)
                    train_masks.append(masks)
                    train_reward.append(V_hat)
                
                train_adj_matrix_torch = torch.tensor(train_adj_matrix)
                train_features_torch = torch.tensor(train_features)
                train_masks_torch = torch.tensor(train_masks)
                train_reward_torch = torch.tensor(train_reward)
                
                optimizer.zero_grad()
                output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch)
                loss = F.mse_loss(output[:, 0], train_reward_torch)
                loss.backward()
                total_loss += loss.item()
                optimizer.step()

            t2 = time.time()

            # logging
            if (epoch + 1) % args.log_interval == 0 or epoch + 1 == args.num_iterations:
                iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1))
                os.makedirs(os.path.join(iter_save_dir), exist_ok = True)
                # save model
                save_path = os.path.join(iter_save_dir, 'V_model.pt')
                torch.save(V.state_dict(), save_path)
                # save explored designs and their rewards
                fp_csv = open(design_csv_path, 'a')
                fieldnames = ['rule_seq', 'reward']
                writer = csv.DictWriter(fp_csv, fieldnames=fieldnames)
                for i in range(epoch - args.log_interval + 1, epoch + 1):
                    writer.writerow({'rule_seq': str(designs[i]), 'reward': design_rewards[i]})
                fp_csv.close()

            epoch_rew_his.append(total_reward)

            t_end = time.time()
            avg_loss = total_loss / args.depth
            len_his = min(len(epoch_rew_his), 30)
            avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his
            print('Epoch {}: Time = {:.2f}, T_sample = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, training loss = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(epoch, t_end - t_start, t1 - t0, t2 - t1, eps, avg_loss, total_reward, avg_reward, best_reward))
            fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a')
            fp_log.write('eps = {:.4f}, loss = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(eps, avg_loss, total_reward, avg_reward))
            fp_log.close()

        save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt')
        torch.save(V.state_dict(), save_path)
    else:
        import IPython
        IPython.embed()

        # test
        V.eval()
        print('Start testing')
        test_epoch = 30
        y0 = []
        y1 = []
        x = []
        for ii in range(10):
            eps = 1.0 - 0.1 * ii

            print('------------------------------------------')
            print('eps = ', eps)

            reward_sum = 0.
            best_reward = -np.inf
            for epoch in range(test_epoch):
                t0 = time.time()

                # use e-greedy to sample a design within maximum #steps.
                done = False
                while not done:
                    state = env.reset() 
                    rule_seq = []
                    state_seq = [state]
                    total_reward = 0.
                    for _ in range(args.depth):
                        action = select_action(env, V, state, eps)
                        if action is None:
                            break
                        rule_seq.append(action)
                        next_state, reward, done = env.step(action)
                        total_reward += reward
                        state_seq.append(next_state)
                        state = next_state
                        if done:
                            break

                reward_sum += total_reward
                best_reward = max(best_reward, total_reward)
                print(f'design {epoch}: reward = {total_reward}, time = {time.time() - t0}')

            print('test avg reward = ', reward_sum / test_epoch)
            print('best reward found = ', best_reward)
            x.append(eps)
            y0.append(reward_sum / test_epoch)
            y1.append(best_reward)

        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize = (10, 5))
        ax[0].plot(x, y0)
        ax[1].plot(x, y1)
        plt.show()