def learn_Maze_HER(n): print("Maze({0}), DQN+HER".format(n)) env = make_env_GoalMaze( maze_generator=RandomMazeGenerator, width=n, height=n, complexity=.05, density=.1, seed=17, obs_type='discrete', reward_type='sparse', step_limit=100) model = HER( policy=partial(DQN_Policy, layers=[1024]), env=env, hindsight=3, learning_rate=1e-4, exploration_fraction=0.1, exploration_final_eps=0.05, ) print("Initial distance: {0}".format(env._distance_diameter())) try: model.learn(total_timesteps=200 * 100 * n, callback=maze_callback, log_interval=20, ) except KeyboardInterrupt: pass # env._set_live_display(True) evaluate(model, env)
def learn_BitFlipper_HER(n): print("BitFlipper({0}), DQN+HER".format(n)) env = make_env_GoalBitFlipper(n=n, space_seed=None) model = HER( policy=partial(DQN_Policy, layers=[1024]), env=env, hindsight=7, learning_rate=1e-4, exploration_fraction=0.1, exploration_final_eps=0.05, ) try: model.learn(total_timesteps=250 * 100 * n, callback=bitflipper_callback, ) except KeyboardInterrupt: pass evaluate(model, env)
def find_optimal_TBS(env): max_reward = 0 flag = True lam = env.Lambda for r in range(lam): # find optimal S size = 200 trans_prob = np.zeros(shape = (size, size)) for i in range(1, size, 1): for j in range(1, min(i+r, size), 1): trans_prob[i, j] = poisson.pmf(lam, i+r-j) k = i + r prob = poisson.pmf(lam, k) while prob > 0: trans_prob[i, 0] = trans_prob[i, 0] + prob k = k + 1 prob = poisson.pmf(lam, k) stat_dist = power_iteration(trans_prob) cum_prob = 0 S = 0 while cum_prob < env.b / (env.b + env.h): cum_prob = cum_prob + stat_dist[S] S = S + 1 # compare reward average_reward = utility.evaluate(env, 100, 5000, TBS, env, r, S)[0] if flag: flag = False max_reward = average_reward opt_r = r opt_S = S elif max_reward < average_reward: max_reward = average_reward opt_r = r opt_S = S return opt_r, opt_S
from utility import Data, evaluate import svm_models import time if __name__ == "__main__": start_time = time.time() data = Data() data.lda_transform() acc = [] for X_train, X_test, y_train, y_test in data.load(): svm = svm_models.SklearnSVC(X_train, y_train) acc.append(svm.score(X_test, y_test)) end_time = time.time() evaluate(acc) print('total time: %.4fs' % (end_time - start_time))
# reload the pretrained model parameters. if args.pretrain == 1: pretrain_path = '%sweights/%s/%s/l%s_r%s' % ( args.proj_path, args.dataset, model.model_type, str( args.lr), '-'.join([str(r) for r in eval(args.regs)])) ckpt = tf.train.get_checkpoint_state( os.path.dirname(pretrain_path + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: sess.run(tf.global_variables_initializer()) saver.restore(sess, ckpt.model_checkpoint_path) print('load the pretrained model parameters from: ', pretrain_path) # ********************************************************* # get the performance from pretrained model. users_to_test = list(dataset.test_set.keys()) ret = evaluate(sess, model, users_to_test, drop_flag=False) cur_best_pre_0 = ret[5] pretrain_ret = 'pretrained model recall=[%.5f, %.5f],' \ 'map=[%.5f, %.5f], ndcg=[%.5f, %.5f], auc=[%.5f]' % (ret[0], ret[4], ret[5], ret[9], ret[10], ret[14], ret[15]) print(pretrain_ret) else: sess.run(tf.global_variables_initializer()) cur_best_pre_0 = 0. print('without pretraining.') else: sess.run(tf.global_variables_initializer()) cur_best_pre_0 = 0. print('without pretraining.')
if args.pretrain == 1: layer = '-'.join([str(l) for l in eval(args.layer_size)]) pretrain_path = '%sweights/%s/%s/%s/l%s_r%s' % ( args.proj_path, args.dataset, model.model_type, layer, str( args.lr), '-'.join([str(r) for r in eval(args.regs)])) ckpt = tf.train.get_checkpoint_state( os.path.dirname(pretrain_path + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: sess.run(tf.global_variables_initializer()) saver.restore(sess, ckpt.model_checkpoint_path) print('load the pretrained model parameters from: ', pretrain_path) # ********************************************************* # get the performance from pretrained model. users_to_test = list(dataset.test_set.keys()) ret = evaluate(sess, model, users_to_test, drop_flag=True) cur_best_pre_0 = ret['recall'][0] pretrain_ret = 'pretrained model recall=[%.5f, %.5f], precision=[%.5f, %.5f], hit=[%.5f, %.5f],' \ 'ndcg=[%.5f, %.5f], auc=[%.5f]' % \ (ret['recall'][0], ret['recall'][-1], ret['precision'][0], ret['precision'][-1], ret['hit_ratio'][0], ret['hit_ratio'][-1], ret['ndcg'][0], ret['ndcg'][-1], ret['auc']) print(pretrain_ret) else: sess.run(tf.global_variables_initializer()) cur_best_pre_0 = 0. print('without pretraining.') else:
Reload the pretrained model parameters. """ if args.pretrain == 1: ckpt = tf.train.get_checkpoint_state(weights_save_path) if ckpt and ckpt.model_checkpoint_path: sess.run(tf.global_variables_initializer()) saver.restore(sess, ckpt.model_checkpoint_path) print('load the pretrained model parameters from: ', weights_save_path) # ********************************************************* # get the performance from pretrained model. if args.report != 1: res = evaluate(sess, model, dataset.test_users, dataset, args.batch_size, args.K, drop_flag=True, batch_test_flag=False) cur_best_pre = res['recall'] pretrain_ret = 'pretrained model recall= %.5f, precision= %.5f, hit= %.5f,' \ 'ndcg= %.5f' % \ (res['recall'], res['precision'], res['hit_ratio'], res['ndcg']) print(pretrain_ret) else: raise RuntimeError("Store variables not found.") else: sess.run(tf.global_variables_initializer()) cur_best_pre = 0.
from utility import Data, evaluate, convert_to_ovr from svm_models import LinearSVM import time if __name__ == "__main__": start_time = time.time() data = Data() acc, correct = [], [] for X_train, X_test, y_train, y_test in data.load(): correct_sum, acc_sum = 0, 0 for z_train, z_test in convert_to_ovr(y_train, y_test): clf = LinearSVM() clf.fit(X_train, z_train) correct_cnt, single_acc = clf.score(X_test, z_test) correct_sum += correct_cnt acc_sum += single_acc acc.append(acc_sum / 10) correct.append(0.1 * correct_sum / len(y_test)) end_time = time.time() print('Overall Accuracy:') evaluate(correct) print('Average Accuracy:') evaluate(acc) print('total time: %.4fs' % (end_time - start_time))