def prepareModel(path): ''' Loads the model and prepares it for inference :param path: path to the pytorch state dict file :return: model ''' net = Net() net.load_state_dict(torch.load(path)) net.eval() return net
def run(): example = torch.rand(1, 9, 17, 17) for maindir, subdir, file_name_list in os.walk("model"): for file in file_name_list: ext = file.split(".")[-1] if ext == "pt": d = int(file.split("_")[1]) w = int(file.split("_")[2]) model_black = Net(d, w, 9) model_black.load_state_dict(torch.load("model/" + file)) model_black.eval() traced = torch.jit.trace(model_black, example) print(traced.code) traced.save("model/" + file + "s")
def reduced_ann_net(old_net, unit_in, unit_remove, new_hidden_num): old_net.hidden.weight[unit_in] += old_net.hidden.weight[unit_remove] old_net.hidden.bias[unit_in] += old_net.hidden.bias[unit_remove] # Slicing the remained weight values and bias values in a new-sized network. new_net = Net(11, new_hidden_num, 3) new_net.hidden.weight[:unit_remove] = old_net.hidden.weight[:unit_remove] new_net.hidden.weight[unit_remove:] = old_net.hidden.weight[unit_remove + 1:] new_net.hidden.bias[:unit_remove] = old_net.hidden.bias[0:unit_remove] new_net.hidden.bias[unit_remove:] = old_net.hidden.bias[unit_remove + 1:] new_net.output.weight[:, :unit_remove] = old_net.output.weight[:, 0: unit_remove] new_net.output.weight[:, unit_remove:] = old_net.output.weight[:, unit_remove + 1:] new_net.output.bias[:] = old_net.output.bias[:] new_net.eval() return new_net
# print(net) if os.path.isfile('model.pt'): net.load_state_dict(torch.load('model.pt')) x = np.arange(-math.pi + 0.1, math.pi - 0.05, math.pi / 50).tolist() y = [fun(i) for i in x] dataset = (convert_arr(x), convert_arr(y)) # print(dataset) train(model=net, dataset=dataset, epochs=500, lr=1e-3, device=device) print(len(x), len(y)) plt.plot(x, y, label='Original') plt.xlabel('X') plt.ylabel('Y') predicted = [] # net.to(torch.device("cpu")) net.eval() for i in x: tmp = Tensor([i]).to(device) pred = net(tmp).tolist() predicted.append(pred) plt.plot(x, predicted, label='Predicted') plt.legend(shadow=False, ) plt.show()
def search_algo(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.set_num_threads(1) # initialize/load task_class = getattr(tasks, args.task) if args.no_noise: task = task_class(force_std=0.0, torque_std=0.0) else: task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # initialize preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = args.depth * 3 global preprocessor # preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels) preprocessor = Preprocessor(all_labels=all_labels) # initialize the env env = RobotGrammarEnv(task, rules, seed=args.seed, mpc_num_processes=args.mpc_num_processes) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess( state) num_features = sample_features.shape[1] V = Net(max_nodes=max_nodes, num_channels=num_features, num_outputs=1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format( args.load_V_path)) # initialize target V_hat look up table V_hat = dict() # load pretrained V_hat if args.load_Vhat_path is not None: V_hat_fp = open(args.load_Vhat_path, 'rb') V_hat = pickle.load(V_hat_fp) V_hat_fp.close() print_info('Loaded pretrained Vhat from {}'.format( args.load_Vhat_path)) # initialize invalid_his invalid_his = dict() num_invalid_samples, num_valid_samples = 0, 0 repeated_cnt = 0 # initialize the seen states pool states_pool = StatesPool(capacity=args.states_pool_capacity) states_set = set() # explored designs designs = [] design_rewards = [] design_opt_seeds = [] # record prediction error prediction_error_sum = 0.0 if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w') fp_eval.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward', 'opt_seed'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr=args.lr) # initialize best design rule sequence best_design, best_reward = None, -np.inf # reward history epoch_rew_his = [] last_checkpoint = -1 # recording time t_sample_sum = 0. # record the count for invalid samples no_action_samples, step_exceeded_samples, self_collision_samples = 0, 0, 0 for epoch in range(args.num_iterations): t_start = time.time() V.eval() # update eps and eps_sample if args.eps_schedule == 'linear-decay': eps = args.eps_start + epoch / args.num_iterations * ( args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_decay) if args.eps_sample_schedule == 'linear-decay': eps_sample = args.eps_sample_start + epoch / args.num_iterations * ( args.eps_sample_end - args.eps_sample_start) elif args.eps_sample_schedule == 'exp-decay': eps_sample = args.eps_sample_end + ( args.eps_sample_start - args.eps_sample_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_sample_decay) t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0 selected_design, selected_reward = None, -np.inf selected_state_seq, selected_rule_seq = None, None p = random.random() if p < eps_sample: num_samples = 1 else: num_samples = args.num_samples # use e-greedy to sample a design within maximum #steps. for _ in range(num_samples): valid = False while not valid: t0 = time.time() state = env.reset() rule_seq = [] state_seq = [state] no_action_flag = False for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: no_action_flag = True break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) state = next_state if not has_nonterminals(state): break valid = env.is_valid(state) t_sample += time.time() - t0 t0 = time.time() if not valid: # update the invalid sample's count if no_action_flag: no_action_samples += 1 elif has_nonterminals(state): step_exceeded_samples += 1 else: self_collision_samples += 1 # update the Vhat for invalid designs update_Vhat(args, V_hat, state_seq, -2.0, invalid=True, invalid_cnt=invalid_his) # update states pool update_states_pool(states_pool, state_seq, states_set, V_hat) num_invalid_samples += 1 else: num_valid_samples += 1 t_update += time.time() - t0 predicted_value = predict(V, state) if predicted_value > selected_reward: selected_design, selected_reward = state, predicted_value selected_rule_seq, selected_state_seq = rule_seq, state_seq t0 = time.time() repeated = False if (hash(selected_design) in V_hat) and (V_hat[hash(selected_design)] > -2.0 + 1e-3): repeated = True repeated_cnt += 1 reward, best_seed = -np.inf, None for _ in range(args.num_eval): _, rew = env.get_reward(selected_design) if rew > reward: reward, best_seed = rew, env.last_opt_seed t_mpc += time.time() - t0 # save the design and the reward in the list designs.append(selected_rule_seq) design_rewards.append(reward) design_opt_seeds.append(best_seed) # update best design if reward > best_reward: best_design, best_reward = selected_rule_seq, reward print_info( 'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}' .format(reward, selected_reward, num_samples)) t0 = time.time() # update V_hat for the valid design update_Vhat(args, V_hat, selected_state_seq, reward) # update states pool for the valid design update_states_pool(states_pool, selected_state_seq, states_set, V_hat) t_update += time.time() - t0 t0 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.opt_iter): minibatch = states_pool.sample( min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] max_nodes = 0 for robot_graph in minibatch: hash_key = hash(robot_graph) target_reward = V_hat[hash_key] # adj_matrix, features, masks = preprocessor.preprocess(robot_graph) adj_matrix, features, _ = preprocessor.preprocess( robot_graph) max_nodes = max(max_nodes, len(features)) train_adj_matrix.append(adj_matrix) train_features.append(features) # train_masks.append(masks) train_reward.append(target_reward) for i in range(len(minibatch)): train_adj_matrix[i], train_features[i], masks = \ preprocessor.pad_graph(train_adj_matrix[i], train_features[i], max_nodes) train_masks.append(masks) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t_opt += time.time() - t0 t_end = time.time() t_sample_sum += t_sample # logging if (epoch + 1 ) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok=True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save V_hat save_path = os.path.join(iter_save_dir, 'V_hat') fp = open(save_path, 'wb') pickle.dump(V_hat, fp) fp.close() # save explored design and its reward fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward', 'opt_seed'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(last_checkpoint + 1, len(designs)): writer.writerow({ 'rule_seq': str(designs[i]), 'reward': design_rewards[i], 'opt_seed': design_opt_seeds[i] }) last_checkpoint = len(designs) - 1 fp_csv.close() epoch_rew_his.append(reward) avg_loss = total_loss / args.opt_iter len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his prediction_error_sum += (selected_reward - reward)**2 avg_prediction_error = prediction_error_sum / (epoch + 1) if repeated: print_white('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward)) else: print_warning('Epoch {:4}: T_sample = {:5.2f}, T_update = {:5.2f}, T_mpc = {:5.2f}, T_opt = {:5.2f}, eps = {:5.3f}, eps_sample = {:5.3f}, #samples = {:2}, training loss = {:7.4f}, pred_error = {:6.4f}, predicted_reward = {:6.4f}, reward = {:6.4f}, last 30 epoch reward = {:6.4f}, best reward = {:6.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, avg_prediction_error, selected_reward, reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\ eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, selected_reward, reward, avg_reward)) fp_log.close() if (epoch + 1) % args.log_interval == 0: print_info( 'Avg sampling time for last {} epoch: {:.4f} second'. format(args.log_interval, t_sample_sum / args.log_interval)) t_sample_sum = 0. print_info('size of states_pool = {}'.format(len(states_pool))) print_info( '#valid samples = {}, #invalid samples = {}, #valid / #invalid = {}' .format( num_valid_samples, num_invalid_samples, num_valid_samples / num_invalid_samples if num_invalid_samples > 0 else 10000.0)) print_info( 'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #self_collision_samples = {}' .format(no_action_samples, step_exceeded_samples, self_collision_samples)) max_trials, cnt = 0, 0 for key in invalid_his.keys(): if invalid_his[key] > max_trials: if key not in V_hat: max_trials = invalid_his[key] elif V_hat[key] < -2.0 + 1e-3: max_trials = invalid_his[key] if invalid_his[key] >= args.max_trials: if V_hat[key] < -2.0 + 1e-3: cnt += 1 print_info( 'max invalid_trials = {}, #failed nodes = {}'.format( max_trials, cnt)) print_info('repeated rate = {}'.format(repeated_cnt / (epoch + 1))) save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(0, 11): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. vaild = False while not valid: state = env.reset() rule_seq = [] state_seq = [state] for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) if not has_nonterminals(next_state): valid = True break state = next_state _, reward = env.get_reward(state) reward_sum += reward best_reward = max(best_reward, reward) print( f'design {epoch}: reward = {reward}, time = {time.time() - t0}' ) print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize=(10, 5)) ax[0].plot(x, y0) ax[0].set_title('Avg Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') ax[1].plot(x, y1) ax[0].set_title('Best Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') plt.show()
gas_eval_y = data['gas_eval_y'] gas_eval_global_prob = data['gas_eval_global_prob'] else: import torch import torch.nn as nn from torch.optim import * from torch.optim.lr_scheduler import * from torch.autograd import Variable from Net import Net from util_in import * # Load model args.kernel_size = tuple(int(x) for x in args.kernel_size.split('x')) model = Net(args).cuda() model.load_state_dict(torch.load(MODEL_FILE)['model']) model.eval() # Load DCASE data dcase_valid_x, dcase_valid_y, _ = bulk_load('DCASE_valid') dcase_test_x, dcase_test_y, dcase_test_hashes = bulk_load('DCASE_test') dcase_test_frame_y = load_dcase_test_frame_truth() DCASE_CLASS_IDS = [ 318, 324, 341, 321, 307, 310, 314, 397, 325, 326, 323, 319, 14, 342, 329, 331, 316 ] # Predict on DCASE data dcase_valid_global_prob = model.predict(dcase_valid_x, verbose=False)[:, DCASE_CLASS_IDS] dcase_thres = optimize_micro_avg_f1(dcase_valid_global_prob, dcase_valid_y) dcase_test_outputs = model.predict(dcase_test_x, verbose=True)
idx2Wd = {idx: wd for idx, wd in enumerate(vocab)} #读取测试集 test_data_path = root_path + '\\Data\\qtest7' testline, testvec = get_test_data(test_data_path, wd2Idx, sentence_len) testbatch = get_test_batch(testvec, BATCH_SIZE, 0) #得到一个batch的测试数据 #读取网络 net7 = Net(sentence_len=sentence_len, batch_size=BATCH_SIZE, vocab_size=vocab_size, embed_size=embed_size, hidden_size=hidden_size) net_path = root_path + '\\Models\\rnn\\rnn7_epoch_1.pth' net7.load_state_dict(torch.load(net_path)) net7.eval() #表示此时网络不在训练 testbatch = np.array(testbatch) print(testbatch.shape) output = net7(testbatch, False) #一个batch测试的输出 output = torch.reshape(output, (BATCH_SIZE * 3 * sentence_len, vocab_size)) #每个输入的输出是3(句)*sentence_len个字的概率分布 wordidx = torch.argmax(output, dim=1) #取概率最大的 #输出测试集的结果 with open("out7.txt", "w", encoding="utf-8") as f: for i in range(BATCH_SIZE): f.write('\n') f.write('\n')
idx2Wd = {idx: wd for idx, wd in enumerate(vocab)} #读取测试集 test_data_path = root_path + '\\Data\\qtest5' testline, testvec = get_test_data(test_data_path, wd2Idx, 5) testbatch = get_test_batch(testvec, BATCH_SIZE, 0) #得到一个batch的测试数据 #读取网络 net5 = Net(sentence_len=sentence_len, batch_size=BATCH_SIZE, vocab_size=vocab_size, embed_size=embed_size, hidden_size=hidden_size) net_path = root_path + '\\Models\\rnn\\rnn5_epoch_4.pth' net5.load_state_dict(torch.load(net_path)) net5.eval() #表示此时网络不在训练 testbatch = np.array(testbatch) print(testbatch.shape) output = net5(testbatch, False) #一个batch测试的输出 output = torch.reshape(output, (BATCH_SIZE * 3 * sentence_len, vocab_size)) #每个输入的输出是3(句)*sentence_len个字的概率分布 wordidx = torch.argmax(output, dim=1) #取概率最大的 #输出测试集的结果 with open("out5.txt", "w", encoding="utf-8") as f: for i in range(BATCH_SIZE): f.write('\n') f.write('\n')
class TrainModel: def __init__(self): torch.cuda.empty_cache() self.learningRate = LEARNING_RATE db = ImageClassifierDataset() db.loadData() train_set, test_set = db.splitDataSet() self.trainSetSize = len(train_set) self.testSetSize = len(test_set) print(f"Train set: {self.trainSetSize} Test set: {self.testSetSize}") self.train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) self.test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True) self.cuda_avail = torch.cuda.is_available() self.model = Net() print(f"Cuda: {self.cuda_avail}") if self.cuda_avail: self.model.cuda() self.optimizer = Adam(self.model.parameters(), lr=LEARNING_RATE) #, weight_decay=WEIGHT_DECAY) self.loss_fn = nn.BCELoss() def adjust_learning_rate(self, epoch): """if epoch > 180: self.learningRate /= 1000000 elif epoch > 150: self.learningRate /= 100000 elif epoch > 120: self.learningRate /= 10000 elif epoch > 90: self.learningRate /= 1000 elif epoch > 60: self.learningRate /= 100 elif epoch > 30: self.learningRate /= 10""" for param_group in self.optimizer.param_groups: param_group["lr"] = self.learningRate * LEARNING_RATE_DECAY def save_models(self): torch.save(self.model.state_dict(), "weights/myModel.model") print("Checkpoint saved") def test(self): self.model.eval() test_acc = 0.0 test_loss = 0.0 for i, (images, labels) in enumerate(self.test_loader): if self.cuda_avail: images = Variable(images.cuda()) labels = Variable(labels.cuda()) outputs = self.model(images) # _, prediction = torch.max(outputs.data, 1) # prediction = prediction.cpu().numpy() prediction = torch.round(outputs.data) #print(outputs, labels, prediction) loss = self.loss_fn(outputs, labels) test_loss += loss.cpu().data.item() * images.size(0) test_acc += torch.sum(torch.eq(prediction, labels.data)) test_acc /= self.testSetSize test_loss /= self.testSetSize return test_acc, test_loss def train(self): best_acc = 0.0 best_loss = 1 epochsSinceLastImprovement = 0 losses = [] print("Starting train...") for epoch in range(EPOCHS): startTime = time.time() self.model.train() train_acc = 0.0 train_loss = 0.0 for i, (images, labels) in enumerate(self.train_loader): if self.cuda_avail: images = Variable(images.cuda()) labels = Variable(labels.cuda()) self.optimizer.zero_grad() outputs = self.model(images) loss = self.loss_fn(outputs, labels) loss.backward() self.optimizer.step() train_loss += loss.cpu().data.item() * images.size(0) prediction = torch.round(outputs.data) train_acc += torch.sum(torch.eq(prediction, labels.data)) self.adjust_learning_rate(epoch) train_acc /= self.trainSetSize train_loss /= self.trainSetSize test_acc, test_loss = self.test() if test_loss + LOSS_IMPROVEMENT < best_loss: if epoch != 0: self.save_models() # best_acc = test_acc best_loss = test_loss epochsSinceLastImprovement = 0 else: epochsSinceLastImprovement += 1 if epochsSinceLastImprovement == EARLY_STOP: print( f"Epoch {epoch}, Train Accuracy: {train_acc} , TrainLoss: {train_loss} , Test Accuracy: {test_acc}, TestLoss: {test_loss} Time: {time.time() - startTime}" ) print( f"No improvement in {epochsSinceLastImprovement} epochs, stopping..." ) losses.append(test_loss) break print( f"Epoch {epoch}, Train Accuracy: {train_acc} , TrainLoss: {train_loss} , Test Accuracy: {test_acc}, TestLoss: {test_loss} Time: {time.time() - startTime}" ) losses.append(test_loss) plt.plot(losses) plt.show()
import torch import pandas as pd from reducing_net import reduced_ann_net from Net import Net, test_model from utils import confusion, F1_score, loadDataset, saveNNParas import time # Loading the previous network status. feature_num = 11 hidden_num = 30 output_num = 3 load_net = Net(feature_num, hidden_num, output_num) load_net.load_state_dict(torch.load('ann_net_model_genre.pt')) #load_net.load_state_dict(torch.load('net_model_subjective_rating.pt')) load_net.eval() # Loading testing dataset to evaluate new network. x_test, y_test = loadDataset('testing') # Loading the information of vector. vectors = pd.read_excel('ann_vector_angle_sample.xls', header=None) raw_df = pd.DataFrame({ 'row': vectors.iloc[:, 0], 'col': vectors.iloc[:, 1], 'vector': vectors.iloc[:, 2] }) # Sorting by the values of vector angle in ascending order. increase_res = raw_df.sort_values('vector', ascending=True) unique_row = increase_res.row.unique()
def search_algo_1(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # initialize/load # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = 80 task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # state preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) global preprocessor preprocessor = Preprocessor(max_nodes=max_nodes, all_labels=all_labels) # initialize the env env = RobotGrammarEnv(task, rules, enable_reward_oracle=True, preprocessor=preprocessor) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess( state) num_features = sample_features.shape[1] V = Net(max_nodes=max_nodes, num_channels=num_features, num_outputs=1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format( args.load_V_path)) # initialize target V_hat look up table V_hat = dict() # load pretrained V_hat if args.load_Vhat_path is not None: V_hat_fp = open(args.load_Vhat_path, 'rb') V_hat = pickle.load(V_hat_fp) V_hat_fp.close() print_info('Loaded pretrained Vhat from {}'.format( args.load_Vhat_path)) # initialize the seen states pool states_pool = StatesPool(capacity=args.states_pool_capacity) all_sample_designs = [] # explored designs designs = [] design_rewards = [] # load previously explored designs if args.load_designs_path is not None: fp_csv = open(args.load_designs_path, newline='') reader = csv.DictReader(fp_csv) for row in reader: rule_seq = ast.literal_eval(row['rule_seq']) reward = float(row['reward']) state = make_initial_graph() for i in range(len(rule_seq)): state = env.transite(state, rule_seq[i]) designs.append(state) design_rewards.append(reward) if not np.isclose(V_hat[hash(state)], reward): print(rule_seq) print(V_hat[hash(state)], reward) print_error("Vhat and designs don't match") fp_csv.close() print_info('Loaded pretrained designs from {}'.format( args.load_designs_path)) if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'w') fp_eval.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr=args.lr) # initialize best design rule sequence best_design, best_reward = None, -np.inf # reward history epoch_rew_his = [] last_checkpoint = -1 # recording time t_sample_sum = 0. # record the count for invalid samples no_action_samples, step_exceeded_samples = 0, 0 for epoch in range(args.num_iterations): t_start = time.time() V.eval() # update eps and eps_sample if args.eps_schedule == 'linear-decay': eps = args.eps_start + epoch / args.num_iterations * ( args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_decay) if args.eps_sample_schedule == 'linear-decay': eps_sample = args.eps_sample_start + epoch / args.num_iterations * ( args.eps_sample_end - args.eps_sample_start) elif args.eps_sample_schedule == 'exp-decay': eps_sample = args.eps_sample_end + ( args.eps_sample_start - args.eps_sample_end) * np.exp( -1.0 * epoch / args.num_iterations / args.eps_sample_decay) t_sample, t_update, t_mpc, t_opt = 0, 0, 0, 0 best_candidate_design, best_candidate_reward = None, -1.0 best_candidate_state_seq, best_candidate_rule_seq = None, None p = random.random() if p < eps_sample: num_samples = 1 else: num_samples = args.num_samples # use e-greedy to sample a design within maximum #steps. for _ in range(num_samples): valid = False while not valid: t0 = time.time() state = env.reset() rule_seq = [] state_seq = [state] random_step_cnt, optimal_step_cnt = 0, 0 no_action_flag = False for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: no_action_flag = True break if step_type == 'random': random_step_cnt += 1 elif step_type == 'optimal': optimal_step_cnt += 1 rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) state = next_state if env.is_valid(next_state): valid = True break t_sample += time.time() - t0 t0 = time.time() # update the invalid sample's count if not valid: if no_action_flag: no_action_samples += 1 else: step_exceeded_samples += 1 # update the Vhat for invalid designs if not valid: update_Vhat(V_hat, state_seq, 0.0) # update states pool update_states_pool(states_pool, state_seq) # if valid but has been explored as a valid design before, then put in state pool but resample it if valid and (hash(state) in V_hat) and (V_hat(hash(state)) > 1e-3): update_Vhat(V_hat, state_seq, V_hat[hash(state)]) update_states_pool(states_pool, state_seq) valid = False # record the sampled design all_sample_designs.append(rule_seq) t_update += time.time() - t0 predicted_value = predict(V, state) if predicted_value > best_candidate_reward: best_candidate_design, best_candidate_reward = state, predicted_value best_candidate_rule_seq, best_candidate_state_seq = rule_seq, state_seq t0 = time.time() _, reward = env.get_reward(best_candidate_design) t_mpc += time.time() - t0 # save the design and the reward in the list designs.append(best_candidate_rule_seq) design_rewards.append(reward) # update best design if reward > best_reward: best_design, best_reward = best_candidate_rule_seq, reward print_info( 'new best: reward = {:.4f}, predicted reward = {:.4f}, num_samples = {}' .format(reward, best_candidate_reward, num_samples)) t0 = time.time() # update V_hat for the valid design update_Vhat(V_hat, best_candidate_state_seq, reward) # update states pool for the valid design update_states_pool(states_pool, best_candidate_state_seq) t_update += time.time() - t0 t0 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.opt_iter): minibatch = states_pool.sample( min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] for robot_graph in minibatch: hash_key = hash(robot_graph) target_reward = V_hat[hash_key] adj_matrix, features, masks = preprocessor.preprocess( robot_graph) train_adj_matrix.append(adj_matrix) train_features.append(features) train_masks.append(masks) train_reward.append(target_reward) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t_opt += time.time() - t0 t_end = time.time() t_sample_sum += t_sample # logging if (epoch + 1 ) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok=True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save V_hat save_path = os.path.join(iter_save_dir, 'V_hat') fp = open(save_path, 'wb') pickle.dump(V_hat, fp) fp.close() # save all_sampled_designs save_path = os.path.join(iter_save_dir, 'all_sampled_designs') fp = open(save_path, 'wb') pickle.dump(all_sample_designs, fp) fp.close() # save explored design and its reward fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(last_checkpoint + 1, len(designs)): writer.writerow({ 'rule_seq': str(designs[i]), 'reward': design_rewards[i] }) last_checkpoint = len(designs) - 1 fp_csv.close() epoch_rew_his.append(reward) avg_loss = total_loss / args.depth len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his print('Epoch {}: T_sample = {:.2f}, T_update = {:.2f}, T_mpc = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, eps_sample = {:.3f}, #samples = {} = {}, training loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(\ epoch, t_sample, t_update, t_mpc, t_opt, eps, eps_sample, num_samples, \ avg_loss, best_candidate_reward, reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, eps_sample = {:.4f}, num_samples = {}, T_sample = {:4f}, T_update = {:4f}, T_mpc = {:.4f}, T_opt = {:.4f}, loss = {:.4f}, predicted_reward = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(\ eps, eps_sample, num_samples, t_sample, t_update, t_mpc, t_opt, avg_loss, best_candidate_reward, reward, avg_reward)) fp_log.close() if (epoch + 1) % args.log_interval == 0: print_info( 'Avg sampling time for last {} epoch: {:.4f} second'. format(args.log_interval, t_sample_sum / args.log_interval)) t_sample_sum = 0. invalid_cnt, valid_cnt = 0, 0 for state in states_pool.pool: if np.isclose(V_hat[hash(state)], 0.): invalid_cnt += 1 else: valid_cnt += 1 print_info( 'states_pool size = {}, #valid = {}, #invalid = {}, #valid / #invalid = {}' .format(len(states_pool), valid_cnt, invalid_cnt, valid_cnt / invalid_cnt)) print_info( 'Invalid samples: #no_action_samples = {}, #step_exceeded_samples = {}, #no_action / #step_exceeded = {}' .format(no_action_samples, step_exceeded_samples, no_action_samples / step_exceeded_samples)) # evaluation if args.eval_interval > 0 and ( (epoch + 1) % args.eval_interval == 0 or epoch + 1 == args.num_iterations): print_info('-------- Doing evaluation --------') print_info('#states = {}'.format(len(states_pool))) loss_total = 0. for state in states_pool.pool: value = predict(V, state) loss_total += (V_hat[hash(state)] - value)**2 print_info('Loss = {:.3f}'.format(loss_total / len(states_pool))) fp_eval = open(os.path.join(args.save_dir, 'eval.txt'), 'a') fp_eval.write('epoch = {}, loss = {:.3f}\n'.format( epoch + 1, loss_total / len(states_pool))) fp_eval.close() save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(0, 11): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. vaild = False while not valid: state = env.reset() rule_seq = [] state_seq = [state] for _ in range(args.depth): action, step_type = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state = env.transite(state, action) state_seq.append(next_state) if env.is_valid(state_next): valid = True break state = next_state _, reward = env.get_reward(state) reward_sum += reward best_reward = max(best_reward, reward) print( f'design {epoch}: reward = {reward}, time = {time.time() - t0}' ) print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize=(10, 5)) ax[0].plot(x, y0) ax[0].set_title('Avg Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') ax[1].plot(x, y1) ax[0].set_title('Best Reward') ax[0].set_xlabel('eps') ax[0].set_ylabel('reward') plt.show()
criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=LR) epoch = 7 for epoch in range(epoch): sum_loss = 0.0 for i, data in enumerate(train_loader): inputs, labels = data inputs, labels = Variable(inputs), Variable(labels) optimizer.zero_grad() # 梯度归零 outputs = net(inputs) # 前向运算 loss = criterion(outputs, labels) # 计算损失 loss.backward() # 反向传播 optimizer.step() # 参数更新 print(loss.item()) # 测试 net.eval() # 转为测试模式 correct = 0 total = 0 for data_test in test_loader: images, labels = data_test images, labels = Variable(images), Variable(labels) output_test = net(images) _, predicted = torch.max(output_test, 1) total += labels.size(0) correct += (predicted == labels).sum() print("correct1: ", correct) print("test acc: {0}".format(correct.item() / len(test_dataset)))
# -*- coding: utf-8 -*- """ @author: Ulrich """ import torch from Net import Net, test_model from utils import confusion, F1_score, loadDataset # Reload the parameters of the trained model. load_net = Net(11, 30, 3) load_net.load_state_dict(torch.load('net_model_subjective_rating.pt')) load_net.eval() """ Manual operation on network reduction. Scheme for units removal: 17 -> 3 24 -> 6 23 -> 9 The units that will be removed are 17, 23, 24. """ # Operation of addition. load_net.hidden.weight[2] += load_net.hidden.weight[16] load_net.hidden.weight[5] += load_net.hidden.weight[23] load_net.hidden.weight[8] += load_net.hidden.weight[22] # Slicing the remained weight values and bias values in a new-sized network. new_net = Net(11, 27, 3) new_net.hidden.weight[:16] = load_net.hidden.weight[:16]
class PoseEstimation: def __init__(self, trainset_info, testset_info=None, lr=0.001, wd=0, radial=False): self.trainset_info = trainset_info self.radial = radial # Tensor using CPU or GPU self.device = self._use_cuda() # model setup self.net = Net() self.net.to(self.device) if radial: self.criterion = nn.L1Loss() else: self.criterion = nn.MSELoss() self.optimizer = optim.Adam(self.net.parameters(), lr=lr, weight_decay=wd) # Input data setup normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.trsfm = transforms.Compose( [transforms.Resize((128, 128)), transforms.ToTensor(), normalize]) # self.trsfm = transforms.Compose([transforms.ToTensor()]) self.trainset = PosEstimationDataset(self.trainset_info, transform=self.trsfm, radial=radial) self.norm_range = self.trainset.get_norm_range() self.trainloader = DataLoader( self.trainset, batch_size=self.trainset_info["batch_size"], shuffle=True) # Set up testset if testset_info is not None: self.load_test_set(testset_info, radial=radial) # initialise directory for saving training results self.save_dir = os.path.join( trainset_info["path"], trainset_info["dataset_name"] + "_results", "eph{}_bs{}_lr{}_wd{}".format(trainset_info["epochs"], trainset_info["batch_size"], lr, wd)) def load_test_set(self, testset_info, radial=False, webcam_test=False): self.testset_info = testset_info self.testset = PosEstimationDataset(self.testset_info, self.trsfm, self.norm_range, radial, webcam_test) self.testloader = DataLoader(self.testset, shuffle=True) def train(self, show_fig=True, save_output=True, eval_eph=False): # Create directory for saving results os.makedirs(self.save_dir, exist_ok=False) loss_sample_size = len(self.trainloader) // 4 # Initialise loss array train_losses = np.zeros(self.trainset_info["epochs"] * len(self.trainloader)) # Initialise distance and angle diff array eph_losses = np.zeros([self.trainset_info["epochs"], 2]) eph_diff = np.zeros([self.trainset_info["epochs"], 4]) # Begin training t0 = time.time() try: for epoch in range(self.trainset_info["epochs"] ): # loop over the dataset multiple times print('\n[Epoch', epoch + 1, ']') running_loss = 0.0 for i, data in enumerate(self.trainloader): # Set network to training mode self.net.train() # get the inputs; data is a dictionary of {image, pos} image, pos = data['image'].to(self.device), data['pos'].to( self.device) # zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize outputs = self.net(image) loss = self.criterion(outputs, pos) loss.backward() self.optimizer.step() # Calculate the difference in euclidean distance and angles train_losses[epoch * len(self.trainloader) + i] = loss.item() # print statistics # running_loss += loss.item() # if i % loss_sample_size == loss_sample_size - 1: # print('[{}, {}] loss: {:.5f}'. # format(epoch + 1, i + 1, running_loss / loss_sample_size)) # running_loss = 0.0 # Run evaluation and show results if eval_eph: eph_losses[epoch], eph_diff[epoch, :] = self.evaluation() except KeyboardInterrupt: pass t1 = time.time() print('Time taken: {}'.format(t1 - t0)) # Save output if save_output: self.save_model_output(train_losses, eph_losses, eph_diff) if show_fig: self.display_training_fig(train_losses, eph_losses, eph_diff) print('\n--- Finished Training ---\n') # Evaluation use model in the class to run def evaluation(self): assert self.testset is not None, \ "No testset is supplied. Make sure PoseEstimation.load_test_set(set_info) is called" # Initialise loss array losses = np.zeros(len(self.testloader)) # Initialise distance and angle diff array diff = np.zeros([len(self.testloader), 2]) # turn on evaluation mode self.net.eval() # start evaluation for i, data in enumerate(self.testloader): # get the inputs; data is a dictionary of {image, pos} image, pos = data['image'].to(self.device), data['pos'].to( self.device) # forward outputs = self.net(image) loss = self.criterion(outputs, pos) # Calculate the error losses[i] = loss.item() diff[i] = self.cal_error(outputs, pos) print("true : {}".format(pos[-1])) print("predict: {}".format(outputs[-1])) return self.print_avg_stat(losses, diff) def _use_cuda(self): device = torch.device("cpu") if torch.cuda.is_available(): device = torch.device("cuda:0") with warnings.catch_warnings(record=True) as w: warnings.filterwarnings("error") try: torch.cuda.get_device_capability(device) except Exception: device = torch.device("cpu") print(device) return device def show_batch_image(self): for i_batch, sample_batched in enumerate(self.trainloader): print(i_batch, sample_batched['image'].size(), sample_batched['pos'].size()) images_batch = sample_batched["image"] if i_batch == 0: plt.figure() grid = torchvision.utils.make_grid(images_batch) plt.imshow(grid.numpy().transpose((1, 2, 0))) plt.axis('off') plt.ioff() plt.show() break # Save model and losses def save_model_output(self, train_losses, test_losses, test_diff): self.net.save_model_parameter(self.trainset_info, self.save_dir) self.save_array2csv(self.trainset_info, train_losses, "train_loss") self.save_array2csv(self.trainset_info, test_losses, "eph_loss") self.save_array2csv(self.trainset_info, test_diff, "diff") # Visualise the losses and deviation def display_training_fig(self, train_losses, test_losses, test_diff): self.plot_array(train_losses, "Loss", self.trainset_info, scatter=True) if self.radial: self.plot_array(test_diff[:, 0], "Difference_in_distance(m)", self.trainset_info, std=test_diff[:, 1]) else: self.plot_array(test_diff[:, 0], "Difference_in_distance(m)", self.trainset_info, std=test_diff[:, 2]) self.plot_array(test_diff[:, 1], "Difference_in_angle(deg)", self.trainset_info, std=test_diff[:, 3]) avg_train_losses = np.average(train_losses.reshape( -1, len(self.trainloader)), axis=1) plt.figure() plt.plot(range(1, len(avg_train_losses) + 1), avg_train_losses, label="train") plt.plot(range(1, len(test_losses) + 1), test_losses[:, 1], label="test") plt.ylabel("Loss") plt.xlabel("epoch") plt.legend() fig_name = "fig_{}_eph{}_bs{}_{}.png".format( self.trainset_info["dataset_name"], self.trainset_info["epochs"], self.trainset_info["batch_size"], "Loss_comp") file_path = os.path.join(self.save_dir, fig_name) plt.savefig(file_path) def plot_array(self, data, ylabel, trainset_info, scatter=False, std=None): plt.figure() if scatter: x = np.arange(len(data)) plt.plot(x, data, marker='o', markersize=0.6, linewidth='0') plt.yscale("log") plt.xlabel("batch") else: plt.errorbar(range(1, len(data) + 1), data, yerr=std, ecolor="k", capsize=3) plt.xlabel("epoch") plt.ylabel(ylabel) fig_name = "fig_{}_eph{}_bs{}_{}.png".format( trainset_info["dataset_name"], trainset_info["epochs"], trainset_info["batch_size"], ylabel) file_path = os.path.join(self.save_dir, fig_name) plt.savefig(file_path) plt.close('all') def save_array2csv(self, trainset_info, data, name): file_name = "{}_{}_eph{}_bs{}.csv".format( name, trainset_info["dataset_name"], trainset_info["epochs"], trainset_info["batch_size"]) file_path = os.path.join(self.save_dir, file_name) np.savetxt(file_path, data, delimiter=",") def cal_error(self, predict, true): # predict and ture has size [batch_size, 6] # [:, :3] is the translational position # [:, 3:] is the rotation in euler angle # De-normalise predict_np = self._denormalise(predict.cpu().detach().numpy()) true_np = self._denormalise(true.cpu().detach().numpy()) if self.radial: return predict_np - true_np else: # Get the euclidean distance error_distances = np.linalg.norm( (predict_np[:, :3] - true_np[:, :3]), axis=1) # Calculate the rotation angle from predicated(output) to true(input) # diff * output = pos # diff = pos * inv(output) # Since the rotvec is the vector of the axis multplited by the angle # The angle is found by finding magnitude of the vector predict_rot = Rotation.from_quat(predict_np[:, 3:]) true_rot = Rotation.from_quat(true_np[:, 3:]) rot = true_rot * predict_rot.inv() diff_angle = rot.as_rotvec() error_rot = np.linalg.norm(diff_angle, axis=1) error_rot = np.rad2deg(error_rot) return [error_distances, error_rot] def _denormalise(self, pos): return pos * (self.norm_range["max"] - self.norm_range["min"]) + self.norm_range["min"] def load_model_parameter(self, path): self.net.load_state_dict(torch.load(path)) def print_avg_stat(self, losses, diff): avg_loss = np.average(losses) avg_diff = np.average(diff, axis=0) std_loss = np.std(losses) std_diff = np.std(diff, axis=0) print(self.trainset.dataset_name) print("Test avg loss: {:.5f} | avg[distance, angle] {}".format( avg_loss, avg_diff)) print("Test std loss: {:.5f} | std[distance, angle] {}".format( std_loss, std_diff)) return [avg_loss, std_loss], np.concatenate((avg_diff, std_diff), axis=0)
def search_algo_2(args): # iniailize random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # initialize/load # TODO: use 80 to fit the input of trained MPC GNN, use args.depth * 3 later for real mpc max_nodes = 80 task_class = getattr(tasks, args.task) task = task_class() graphs = rd.load_graphs(args.grammar_file) rules = [rd.create_rule_from_graph(g) for g in graphs] # state preprocessor # Find all possible link labels, so they can be one-hot encoded all_labels = set() for rule in rules: for node in rule.lhs.nodes: all_labels.add(node.attrs.require_label) all_labels = sorted(list(all_labels)) global preprocessor preprocessor = Preprocessor(max_nodes = max_nodes, all_labels = all_labels) # initialize the env env = RobotGrammarEnv(task, rules, enable_reward_oracle = True, preprocessor = preprocessor) # initialize Value function device = 'cpu' state = env.reset() sample_adj_matrix, sample_features, sample_masks = preprocessor.preprocess(state) num_features = sample_features.shape[1] V = Net(max_nodes = max_nodes, num_channels = num_features, num_outputs = 1).to(device) # load pretrained V function if args.load_V_path is not None: V.load_state_dict(torch.load(args.load_V_path)) print_info('Loaded pretrained V function from {}'.format(args.load_V_path)) if not args.test: # initialize save folders and files fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'w') fp_log.close() design_csv_path = os.path.join(args.save_dir, 'designs.csv') fp_csv = open(design_csv_path, 'w') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) writer.writeheader() fp_csv.close() # initialize the optimizer global optimizer optimizer = torch.optim.Adam(V.parameters(), lr = args.lr) # initialize best design best_design, best_reward = None, -np.inf # initialize the seen states pool states_pool = [] # initialize visited states state_set = set() # TODO: load previously explored designs # explored designs designs = [] design_rewards = [] # reward history epoch_rew_his = [] for epoch in range(args.num_iterations): t_start = time.time() V.eval() t0 = time.time() # use e-greedy to sample a design within maximum #steps. if args.eps_schedule == 'linear-decay': # linear schedule eps = args.eps_start + epoch / args.num_iterations * (args.eps_end - args.eps_start) elif args.eps_schedule == 'exp-decay': # exp schedule eps = args.eps_end + (args.eps_start - args.eps_end) * np.exp(-1.0 * epoch / args.num_iterations / args.eps_decay) done = False while not done: state = env.reset() rule_seq = [] state_seq = [state] total_reward = 0. for _ in range(args.depth): action = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state, reward, done = env.step(action) total_reward += reward state_seq.append(next_state) state = next_state if done: break # save the design and the reward in the list designs.append(rule_seq) design_rewards.append(total_reward) # update best design if total_reward > best_reward: best_design, best_reward = rule_seq, total_reward # update state pool for ancestor in state_seq: state_hash_key = hash(ancestor) if not (state_hash_key in state_set): state_set.add(state_hash_key) states_pool.append(ancestor) t1 = time.time() # optimize V.train() total_loss = 0.0 for _ in range(args.depth): minibatch = random.sample(states_pool, min(len(states_pool), args.batch_size)) train_adj_matrix, train_features, train_masks, train_reward = [], [], [], [] for robot_graph in minibatch: V_hat = compute_Vhat(robot_graph, env, V) adj_matrix, features, masks = preprocessor.preprocess(robot_graph) train_adj_matrix.append(adj_matrix) train_features.append(features) train_masks.append(masks) train_reward.append(V_hat) train_adj_matrix_torch = torch.tensor(train_adj_matrix) train_features_torch = torch.tensor(train_features) train_masks_torch = torch.tensor(train_masks) train_reward_torch = torch.tensor(train_reward) optimizer.zero_grad() output, loss_link, loss_entropy = V(train_features_torch, train_adj_matrix_torch, train_masks_torch) loss = F.mse_loss(output[:, 0], train_reward_torch) loss.backward() total_loss += loss.item() optimizer.step() t2 = time.time() # logging if (epoch + 1) % args.log_interval == 0 or epoch + 1 == args.num_iterations: iter_save_dir = os.path.join(args.save_dir, '{}'.format(epoch + 1)) os.makedirs(os.path.join(iter_save_dir), exist_ok = True) # save model save_path = os.path.join(iter_save_dir, 'V_model.pt') torch.save(V.state_dict(), save_path) # save explored designs and their rewards fp_csv = open(design_csv_path, 'a') fieldnames = ['rule_seq', 'reward'] writer = csv.DictWriter(fp_csv, fieldnames=fieldnames) for i in range(epoch - args.log_interval + 1, epoch + 1): writer.writerow({'rule_seq': str(designs[i]), 'reward': design_rewards[i]}) fp_csv.close() epoch_rew_his.append(total_reward) t_end = time.time() avg_loss = total_loss / args.depth len_his = min(len(epoch_rew_his), 30) avg_reward = np.sum(epoch_rew_his[-len_his:]) / len_his print('Epoch {}: Time = {:.2f}, T_sample = {:.2f}, T_opt = {:.2f}, eps = {:.3f}, training loss = {:.4f}, reward = {:.4f}, last 30 epoch reward = {:.4f}, best reward = {:.4f}'.format(epoch, t_end - t_start, t1 - t0, t2 - t1, eps, avg_loss, total_reward, avg_reward, best_reward)) fp_log = open(os.path.join(args.save_dir, 'log.txt'), 'a') fp_log.write('eps = {:.4f}, loss = {:.4f}, reward = {:.4f}, avg_reward = {:.4f}\n'.format(eps, avg_loss, total_reward, avg_reward)) fp_log.close() save_path = os.path.join(args.save_dir, 'model_state_dict_final.pt') torch.save(V.state_dict(), save_path) else: import IPython IPython.embed() # test V.eval() print('Start testing') test_epoch = 30 y0 = [] y1 = [] x = [] for ii in range(10): eps = 1.0 - 0.1 * ii print('------------------------------------------') print('eps = ', eps) reward_sum = 0. best_reward = -np.inf for epoch in range(test_epoch): t0 = time.time() # use e-greedy to sample a design within maximum #steps. done = False while not done: state = env.reset() rule_seq = [] state_seq = [state] total_reward = 0. for _ in range(args.depth): action = select_action(env, V, state, eps) if action is None: break rule_seq.append(action) next_state, reward, done = env.step(action) total_reward += reward state_seq.append(next_state) state = next_state if done: break reward_sum += total_reward best_reward = max(best_reward, total_reward) print(f'design {epoch}: reward = {total_reward}, time = {time.time() - t0}') print('test avg reward = ', reward_sum / test_epoch) print('best reward found = ', best_reward) x.append(eps) y0.append(reward_sum / test_epoch) y1.append(best_reward) import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize = (10, 5)) ax[0].plot(x, y0) ax[1].plot(x, y1) plt.show()