def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(3) steps = [50] reassess_for = [''] options = [] for r in itertools.product(steps, reassess_for, runs): options.append(r) configs = { "balancing": "cfg/leo_balancing.yaml", } L0 = rl_run(configs, alg, options, rb_save=True) ## Zero-shot walking steps = [300] grid_size = 5 rwForward = np.linspace(100, 500, grid_size) # 300 rwTime = np.linspace(-2.9, -0.1, grid_size) # -1.5 rwWork = np.linspace(-3.9, -0.1, grid_size) # -2.0 options = [] for r in itertools.product(steps, reassess_for, rwForward, rwTime, rwWork, runs): options.append(r) configs = { "walking": "cfg/leo_walking.yaml", } L1 = rl_run(configs, alg, options) ## Replay buffer steps = [250] reassess_for = [''] options = [] for r in itertools.product(steps, reassess_for, rwForward, rwTime, rwWork, runs): options.append(r) configs = { "walking_after_balancing": "cfg/leo_walking.yaml", } L2 = rl_run(configs, alg, options, load_file="ddpg-balancing-5000000-1010", rb_load="ddpg-balancing-5000000-1010") # Execute learning do_multiprocessing_pool(arg_cores, L0) L = L1 + L2 random.shuffle(L) do_multiprocessing_pool(arg_cores, L)
def main(): args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters L = [] for f in glob.glob('cl/*-02_walking.yaml'): with open(f, 'r') as file: config = yaml.load(file) config['steps'] = config['steps'] + 200000 L.append(config) len(L) M = [] for config in L: a, b = config['output'].split('/') with open("{}/_{}.yaml".format(a, b), 'w', encoding='utf8') as file: yaml.dump(config, file, default_flow_style=False, allow_unicode=True) M.append("{}/_{}.yaml".format(a, b)) do_multiprocessing_pool(arg_cores, M)
def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(3) steps = [50] reassess_for = [''] options = [] for r in itertools.product(steps, reassess_for, runs): options.append(r) configs = { # "balancing" : "cfg/leo_balancing.yaml", "balancing_after_crouching": "cfg/leo_balancing.yaml", } L0 = rl_run(configs, alg, options, rb_save=True, load_file="ddpg-crouching-5000000-1010") do_multiprocessing_pool(arg_cores, L0)
def rl_run(dict_of_cfgs, alg, options, save=True, load_file='', rb_save=False, rb_load=''): list_of_new_cfgs = [] loc = "tmp" if not os.path.exists(loc): os.makedirs(loc) for key in dict_of_cfgs: args = parse_args() cfg = dict_of_cfgs[key] for o in options: str_o = opt_to_str(o) str_o += '-' + boolList2BinString( [save, bool(load_file), rb_save, bool(rb_load)]) if not str_o: str_o += "mp{}".format(o[-1]) else: str_o += "-mp{}".format(o[-1]) print("Generating parameters: {}".format(str_o)) # create local filename list_of_new_cfgs.append("{}/{}-{}-{}.yaml".format( loc, alg, key, str_o)) args['cfg'] = cfg args['steps'] = o[0] * 1000 args['rb_max_size'] = args['steps'] args['reassess_for'] = o[1] args['save'] = save if load_file: args['load_file'] = "{}-mp{}".format(load_file, o[-1]) args['output'] = "{}-{}-{}".format(alg, key, str_o) if rb_save: args['rb_save_filename'] = args['output'] if rb_load: args['rb_load_filename'] = "{}-mp{}".format(rb_load, o[-1]) # Threads start at the same time, to prevent this we specify seed in the configuration args['seed'] = int.from_bytes( os.urandom(4), byteorder='big', signed=False) // 2 with io.open(list_of_new_cfgs[-1], 'w', encoding='utf8') as file: yaml.dump(args, file, default_flow_style=False, allow_unicode=True) print(list_of_new_cfgs) return list_of_new_cfgs
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) exp_name = "ddpg-exp2_two_stage" tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs = [] # In this experiment we keep walking steps at 250000, but additionally do balancing steps of various durations wsteps = 250000 for bsteps_mul in range(9): name = "{}-mul{}".format(exp_name, bsteps_mul) if bsteps_mul > 0: bsteps = 25000 * bsteps_mul args['rb_max_size'] = wsteps options = { 'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload' } mp_cfgs += do_steps_based(args, cores, name=name, steps=(-1, bsteps, wsteps), options=options, **misc) else: options = {'balancing_tf': '', 'balancing': '', 'walking': ''} mp_cfgs += do_steps_based(args, cores, name=name, steps=(-1, -1, wsteps), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) keep_samples = False exp_name = "ddpg-exp1_two_stage" tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} mp_cfgs = [] steps = 300000 args['rb_max_size'] = steps options = {'balancing_tf': '', 'balancing': '', 'walking': ''} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, -1, steps), options=options, **misc) bsteps = 50000 wsteps = steps - bsteps args['rb_max_size'] = steps if keep_samples else wsteps options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload_re_walking_300_-1.5'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload_re_walking_300_-1.5'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(5) noise_type = [1, 0] normalize_observations = [1, 0] normalize_returns = [1, 0] layer_norm = [1, 0] tau = [0.001] #, 0.01] architecture = [0] # 0: 'Divyam' alg = 'ddpg' ### nb_timesteps = [100] options = [] for r in itertools.product(nb_timesteps, noise_type, normalize_observations, normalize_returns, layer_norm, tau, architecture, runs): options.append(r) options = [flatten(tupl) for tupl in options] configs = [ "cfg/rbdl_py_balancing.yaml", ] L1 = rl_run_zero_shot(configs, alg, args, options) ### nb_timesteps = [300] options = [] for r in itertools.product(nb_timesteps, noise_type, normalize_observations, normalize_returns, layer_norm, tau, architecture, runs): options.append(r) options = [flatten(tupl) for tupl in options] configs = [ "cfg/rbdl_py_walking.yaml", ] L2 = rl_run_zero_shot(configs, alg, args, options) L = L1 + L2 random.shuffle(L)
def main(): prepare_multiprocessing() alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(10, 20) bsteps = range(10, 160, 10) options = [] for r in itertools.product(bsteps, runs): options.append(r) configs = { "balancing": "cfg/leo_balancing.yaml", } L0 = rl_run(configs, alg, options, rb_save=True) ## Zero-shot walking wsteps = [300 - s for s in bsteps] options = [] for r in itertools.product(wsteps, runs): options.append(r) configs = { "walking_after_balancing": "cfg/leo_walking.yaml", } L1 = rl_run(configs, alg, options, load_file="ddpg-balancing-{}-1010", rb_load="ddpg-balancing-{}-1010") # Execute learning # longer runs first L0 = L0[::-1] L1 = L1[::-1] #print(L0) #print(L1) do_multiprocessing_pool(arg_cores, L0) do_multiprocessing_pool(arg_cores, L1)
def mujoco_export(env, mp, task='Walking', policies='', trajectories='', misc=''): args = parse_args() if task == 'Balancing': task_balancing = task else: task_balancing = '' args['cfg'] = "Roboschool{}-v1".format(env + task_balancing + 'GRL') args['steps'] = 0 args['trials'] = 1 args['test_interval'] = 0 args['normalize_observations'] = False args['normalize_returns'] = False args['batch_norm'] = True args['render'] = True args['output'] = misc + '{}_{}_play-mp{}'.format(env.lower(), task.lower(), mp) t = task[0].lower() if t == 'b': stage = stage_names[1] elif t == 'w': stage = stage_names[2] else: raise ValueError('incorrect task ' + task) args[ 'load_file'] = policies + 'ddpg-exp1_two_stage_{env}_ga_{task}-g0001-mp{mp}-{stage}'.format( env=env.lower(), task=t, mp=mp, stage=stage) args[ 'compare_with'] = policies + 'ddpg-exp1_two_stage_{env}_ga_b-g0001-mp{mp}-01_balancing-last'.format( env=env.lower(), mp=mp) args['trajectory'] = trajectories + '{}_{}-mp{}'.format( env.lower(), task.lower(), mp) args['env_timestep'] = 0.0165 # Run actual script. args['save'] = False cfg_run(**args)
def leo_export(mp, policies='', task='walking', trajectories='', misc=''): args = parse_args() env = 'leo' args['cfg'] = 'cfg/{}_{}_play.yaml'.format(env, task) args['steps'] = 0 args['trials'] = 1 args['test_interval'] = 0 args['normalize_observations'] = False args['normalize_returns'] = False args['batch_norm'] = True args['output'] = misc + '{}_{}_play-mp{}'.format(env, task, mp) t = task[0].lower() if t == 'b': stage = stage_names[1] elif t == 'w': stage = stage_names[2] else: raise ValueError('incorrect task ' + task) args[ 'load_file'] = policies + 'ddpg-exp1_two_stage_leo_ga_{task}-g0001-mp{mp}-{stage}'.format( task=t, mp=mp, stage=stage) args[ 'compare_with'] = policies + 'ddpg-exp1_two_stage_leo_ga_b-g0001-mp{mp}-01_balancing-last'.format( mp=mp) args['trajectory'] = trajectories + '{}_{}-mp{}'.format(env, task, mp) args['env_timestep'] = 0.03 # Run actual script. args['save'] = False cfg_run(**args) if task == 'walking': os.rename('aux_leo.csv', 'aux_leo-mp{}.csv'.format(mp))
def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(2) reassess_for = [''] #### options = [] steps = [200] for r in itertools.product(steps, reassess_for, runs): options.append(r) configs = { "HalfCheetahBalancing" : "RoboschoolHalfCheetahBalancing-v1", } L0 = rl_run(configs, alg, options, rb_save=True) #### options = [] steps = [300] for r in itertools.product(steps, reassess_for, runs): options.append(r) configs = { "Walker2dBalancing" : "RoboschoolWalker2dBalancing-v1", } L1 = rl_run(configs, alg, options, rb_save=True) #### do_multiprocessing_pool(arg_cores, L0+L1)
def main(): params = {} params['steps_of_history'] = 3 params['damamge_threshold'] = 10000.0 params['normalize'] = True zero_padding_after = 1 dim = 4 config = parse_args() config["cl_lr"] = 0.001 config['cl_structure'] = 'rnnr:rnn_tanh_8_dropout;fc_linear_1' # config['cl_structure'] = 'rnnr:rnn_tanh_4_dropout;rnn_tanh_4_dropout;fc_linear_1' config['cl_dropout_keep'] = 0.7 config['cl_depth'] = 1 config['default_damage'] = 4035.00 prepare_datasets = True if prepare_datasets: dd = load_data('leo_supervised_learning_regression/') # Rule-based processing before splitting dd = clean_dataset(dd, params) #dd = process_data(dd, config, params, zero_padding_after) # split into training and testing sets test_percentage = 0.3 idx = np.arange(len(dd)) np.random.shuffle(idx) test_idx = int(len(dd) * test_percentage) dd_train = [d for i, d in enumerate(dd) if i in idx[test_idx:]] dd_test = [d for i, d in enumerate(dd) if i in idx[:test_idx]] # normalize training dataset and use moments to normalizing test dataset dd_train, data_norm, damage_norm = normalize_data( dd_train, config, params, zero_padding_after) dd_test, _, _ = normalize_data(dd_test, config, params, zero_padding_after, data_norm, damage_norm) # cut data into sequences dd_train = seq_cut(dd_train, params, dim) dd_test = seq_cut(dd_test, params, dim) with open('data.pkl', 'wb') as f: pickle.dump((dd_train, dd_test), f) else: with open('data.pkl', 'rb') as f: dd_train, dd_test = pickle.load(f) # prepare for training pt = PerformanceTracker(depth=config['cl_depth'], input_norm=config["cl_running_norm"], dim=dim) cl_nn = CurriculumNetwork((params['steps_of_history'], pt.get_v_size()), config) cl_nn.train(None, dd_train['seq_data'], dd_train['seq_damage'], n_epoch=2, validation_set=0.1, show_metric=True, batch_size=64) ### ACCURACY data = dd_test['data'] seq_end_idx = np.where(~data.any(axis=1))[0] true_stage = dd_test['stage'] true_stage = np.asarray(np.rint(true_stage), dtype=int) true_stage[seq_end_idx] = 0 true_stage_damage = dd_test['damage'] damage = [] for i in range(3): seq_data = dd_test['seq_data'] seq_data[:, :, -1] = (i + 1) / 10 seq_data[seq_end_idx, -1, -1] = 0 damage.append(cl_nn.predict_(None, seq_data)) #verify = cl_nn.predict_(None, seq_data) damage_ = np.transpose(np.reshape(damage, [3, -1])) cl_predict = np.argmin(damage_, axis=1) #cl_predict_damage_min = np.min(damage_, axis=1) cl_predict_damage = damage_[range(len(true_stage)), true_stage] seq_end_idx_ = np.concatenate((-1 * np.ones((1, )), seq_end_idx)) seq_end_idx_ = np.asarray(seq_end_idx_, dtype=int) seq_end_idx_range = range(len(seq_end_idx_) - 1) predict_damage = np.array([ sum(cl_predict_damage[seq_end_idx_[i] + 1:seq_end_idx_[i + 1]]) for i in seq_end_idx_range ]) true_damage = np.array([ sum(true_stage_damage[seq_end_idx_[i] + 1:seq_end_idx_[i + 1]]) for i in seq_end_idx_range ]) true_damage_sorted_idx = np.argsort(true_damage, axis=0) predict_damage_sorted = predict_damage[true_damage_sorted_idx] print(predict_damage_sorted) error = np.linalg.norm(predict_damage - true_damage) coefficient_of_dermination = r2_score(true_damage, predict_damage) print(error, coefficient_of_dermination) # true_stage = data[:, -1]*10 - 1 # true_stage = np.asarray(np.rint(true_stage), dtype=int) # diff = true_stage-cl_predict # diff[seq_end_idx] = 0 # accuracy = 1 - np.count_nonzero(diff)/(len(true_stage)-len(seq_end_idx)) # print("Accuracy {}".format(accuracy)) ### ERROR and R-SQUARED damage = cl_nn.predict_(None, dd_test['seq_data']) error = np.linalg.norm(damage - dd_test['seq_damage']) coefficient_of_dermination = r2_score(dd_test['seq_damage'], damage) print(error, coefficient_of_dermination)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(7, 16) exp_name = "ddpg-exp1_two_stage" starting_task = 'balancing_tf' misc = {'starting_task': starting_task, 'runs': runs} mp_cfgs = [] keep_samples = False # Hopper tasks = { 'balancing_tf': 'RoboschoolHopperBalancingGRL-v1', 'balancing': 'RoboschoolHopperBalancingGRL-v1', 'walking': 'RoboschoolHopperGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps, reassess_for, tasks, **misc) # HalfCheetah tasks = { 'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1', 'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1', 'walking': 'RoboschoolHalfCheetahGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps, steps, reassess_for, tasks, **misc) # Walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } bsteps = 200000 steps = 700000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps, reassess_for, tasks, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): prepare_multiprocessing() alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) args['mp_debug'] = True #args['reach_return'] = 1422.66 #args['default_damage'] = 4035.00 args['reach_return'] = 526.0 args['default_damage'] = 4132.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args['rb_min_size'] = 1000 args['cl_l2_reg'] = 0 steps = 400000 args['rb_max_size'] = steps steps_delta_a = 1000 steps_delta_b = 4000 popsize = 16 * 6 G = 100 use_mp = True # ### For debugging # args['mp_debug'] = False # steps = 3000 # steps_delta_a = 50 # steps_delta_b = 50 # G = 100 # popsize = 3 # use_mp = False # ### # Tasks tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } root = "cl" if not os.path.exists(root): os.makedirs(root) categories = range(26) #balancing_tf = np.array(categories)/max(categories) #balancing_tf = [int(steps_ub*(math.exp(3*x)-1)/(math.exp(3)-1)) for x in balancing_tf] # To ensure fair sampling, enumberate all step_options and select unique ones! step_combinations, step_solutions = [], [] for a in categories: for b in categories: sol = comb_to_sol((a, b), steps, steps_delta_a, steps_delta_b) if sol not in step_solutions: step_combinations.append((a, b)) step_solutions.append(sol) opt = opt_ce(popsize, step_combinations, categories) g = 1 #opt = opt_ce.load(root, 'opt.pkl') #g = 2 hp = Helper(args, root, alg, tasks, starting_task, arg_cores, use_mp=use_mp) while not opt.stop() and g <= G: if args['mp_debug']: sys.stdout = Logger(root + "/stdout-g{:04}.log".format(g)) print("Should work") combinations = opt.ask() # convert sampled options to solutions solutions = [] for comb in combinations: solutions.append( comb_to_sol(comb, steps, steps_delta_a, steps_delta_b)) # preparation mp_cfgs = hp.gen_cfg_steps(solutions, g, options=options) for cfg in mp_cfgs: cfg[0]['test_interval'] = 1 + randint(0, 29) # evaluate and backup immediately damage = hp.run(mp_cfgs) with open(root + '/damage.pkl', 'wb') as f: pickle.dump(damage, f, 2) # remove None elements notnonel = np.where(np.array(damage) != None)[0] damage = [d for i, d in enumerate(damage) if i in notnonel] combinations = [d for i, d in enumerate(combinations) if i in notnonel] # update using *original* solutions == combinations best = opt.tell(combinations, damage) # back-project to array incluing None elements best = [notnonel[i] for i in best] # logging opt.log(root, alg, g, hp.damage_info, hp.reeval_damage_info, best) opt.save(root, 'opt.pkl') # new iteration g += 1
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from ddpg import parse_args, cfg_run args = parse_args() env = 'Walker2d' #env = 'HalfCheetah' #env = 'Hopper' task = 'Balancing' #task = 'Walking' tf = True #tf = False #env = 'Atlas' #task = 'ForwardWalk' if task == 'Balancing': task_balancing = task else: task_balancing = '' if tf: tfstr = '_TF' else: tfstr = '' args['cfg'] = "Roboschool{}-v1".format(env + task_balancing + 'GRL' + tfstr) #args['cfg'] = "Roboschool{}-v1".format(env+task_balancing) #args['cfg'] = "Roboschool{}-v1".format(env+task)
def main(): config = parse_args() with open("leo_supervised_learning/positive_keys.txt", 'r') as f: fpositives = f.readlines() fpositives = [x.strip() for x in fpositives] with open("leo_supervised_learning/negative_keys.txt", 'r') as f: fnegatives = f.readlines() fnegatives = [x.strip() for x in fnegatives] # now create two lists of positive and negative training samples config['reach_return'] = 1422.66 config['cl_structure'] = 'softmax_2' # softmax classifier for a number of classes config['cl_depth'] = 2 config["cl_l2_reg"] = 0.01 pt = PerformanceTracker(depth=config['cl_depth'], dim=4, input_norm=config["cl_running_norm"]) with tf.Graph().as_default() as sa: # cl_nn = CurriculumNetwork(pt.get_v_size(), config) N = None P = None for fps in fpositives: fbalancing, fwalking = fps.split() data = np.loadtxt(fbalancing, skiprows=2, usecols=(1, 3, 4)) rw = data[:, 0, np.newaxis]/config['reach_return'] # return tt = data[:, 1, np.newaxis]/config['env_timeout'] # duration fl = data[:, 2, np.newaxis] # falls fl_rate = np.diff(np.vstack(([0], fl)), axis=0) / config["test_interval"] labels = np.hstack((np.zeros(rw.shape), np.ones(rw.shape))) labels[-1, :] = np.array([1, 0]) # positive label in the end of the sequence normalized_data = np.hstack((rw,tt,fl_rate)) # add two rows of zeros which correspond to situations when PerformanceTracker is empty normalized_data = np.vstack((np.zeros(shape=(2,3)),normalized_data)) labels = np.vstack((np.zeros(shape=(2,2)),labels)) reshaped_data = np.hstack((normalized_data[:-1, :], normalized_data[1:, :], labels[1:,:])) neg_data = reshaped_data[:-1, :] pos_data = reshaped_data[ -1, :] N = np.vstack((N, neg_data)) if N is not None else neg_data P = np.vstack((P, pos_data)) if P is not None else pos_data for fps in fnegatives: fbalancing, fwalking = fps.split() data = np.loadtxt(fbalancing, skiprows=2, usecols=(1, 3, 4)) rw = data[:, 0, np.newaxis]/config['reach_return'] # reward tt = data[:, 1, np.newaxis]/config['env_timeout'] # duration fl = data[:, 2, np.newaxis] # falls fl_rate = np.diff(np.vstack(([0], fl)), axis=0) / config["test_interval"] labels = np.hstack((np.zeros(rw.shape), np.ones(rw.shape))) normalized_data = np.hstack((rw,tt,fl)) # add two rows of zeros which correspond to situations when PerformanceTracker is empty normalized_data = np.vstack((np.zeros(shape=(2,3)),normalized_data)) labels = np.vstack((np.zeros(shape=(2,2)),labels)) reshaped_data = np.hstack((normalized_data[:-1, :], normalized_data[1:, :], labels[1:,:])) N = np.vstack((N, neg_data)) # divide into training and testing sets testing_percentage = 0.7 np.random.shuffle(N) np.random.shuffle(P) trN = N[0:int(N.shape[0]*testing_percentage), :] tsN = N[int(N.shape[0]*testing_percentage):, :] trP = P[0:int(P.shape[0]*testing_percentage), :] tsP = P[int(P.shape[0]*testing_percentage):, :] training_epochs = 10000 batch_size = 10 display_step = 10 with tf.Session(graph=sa) as sess: # random initialization of variables sess.run(tf.global_variables_initializer()) # Training cycle for epoch in range(training_epochs): np.random.shuffle(trN) trN_sub = trN[0:trP.shape[0]] tr = np.vstack((trN_sub, trP)) np.random.shuffle(tr) batches_num = tr.shape[0] / batch_size avg_cost = 0. # Loop over all batches for batch in np.array_split(tr, batches_num): # Run optimization op (backprop) and cost op (to get loss value) _, c = cl_nn.train(sess, batch[:, 0:6], batch[:, 6:8]) # Compute average loss avg_cost += c / batch.shape[0] # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)) print("Optimization Finished!") # Sensitivity / Specificity check nlabels, plabels = [], [] for v in tsP: plabels.append(cl_nn.predict(sess, v[np.newaxis, 0:6])) # output should be 1 for positives for v in tsN: nlabels.append(cl_nn.predict(sess, v[np.newaxis, 0:6])) # output should be 0 for negatives #fp = [tsN[i, :] for i, l in enumerate(nlabels) if l == 1] TP = sum(plabels) / len(plabels) FN = 1 - TP FP = sum(nlabels) / len(nlabels) TN = 1 - FP sensitivity = TP / (TP + FN) specificity = TN / (TN + FP) print('sensitivity = {}, specificity = {}'.format(sensitivity, specificity)) params = cl_nn.get_params(sess) print('params = {}'.format(params)) cmaes_params = params[::2] - params[1::2] print('cmaes_param = {}'.format(cmaes_params)) pass ###################################################################################### if __name__ == "__main__": main()
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(1) #model = 'perturbed' model = 'idealized' if model == 'idealized': tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } else: tasks = { 'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml', 'balancing': 'cfg/leo_perturbed_balancing.yaml', 'walking': 'cfg/leo_perturbed_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs = [] # nn_params=("long_curriculum_network", "long_curriculum_network_stat.pkl") # mp_cfgs += do_network_based(args, cores, name='ddpg-cl_long', nn_params=nn_params, **misc) nn_params = ("short_curriculum_network", "short_curriculum_network_stat.pkl") mp_cfgs += do_network_based_leo(args, cores, name='ddpg-cl_short', nn_params=nn_params, **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bw', steps=( -1, 50000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-w', steps=( -1, -1, 300000), **misc) # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc) # # walker2d # tasks = { # 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', # 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', # 'walking': 'RoboschoolWalker2dGRL-v1' # } # misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} # mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() #do_multiprocessing_pool(cores, mp_cfgs) config, tasks, starting_task = mp_cfgs[0] cl_run(tasks, starting_task, **config)
def main(): args = parse_args() args[ "test_interval"] = -1 # this ensures that only learning trajectory is exported if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(6) exp_name = "ddpg-exp1_two_stage" starting_task = 'balancing_tf' misc = {'starting_task': starting_task, 'runs': runs} mp_cfgs = [] keep_samples = False # Leo tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } bsteps = 50000 steps = 300000 reassess_for = 'walking_300_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.03 cl_options = { 'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload' } mp_cfgs += create_tasks(args, cores, exp_name + '_leo', bsteps, steps, reassess_for, tasks, cl_options, **misc) # Hopper tasks = { 'balancing_tf': 'RoboschoolHopperBalancingGRL-v1', 'balancing': 'RoboschoolHopperBalancingGRL-v1', 'walking': 'RoboschoolHopperGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps, reassess_for, tasks, cl_options, **misc) # HalfCheetah tasks = { 'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1', 'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1', 'walking': 'RoboschoolHalfCheetahGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps, steps, reassess_for, tasks, cl_options, **misc) # Walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } bsteps = 200000 steps = 700000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps, reassess_for, tasks, cl_options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(10) reassess_for = [''] env = 'Walker2d2' e100 = "{}100".format(env) e150 = "{}150".format(env) e200 = "{}200".format(env) e250 = "{}250".format(env) ##### # Curriculum keys = (e100, e150, e200, e250) bsteps = {e100: 100, e150: 150, e200: 200, e250: 250} steps = {e100: 700, e150: 700, e200: 700, e250: 700} rb_names = {} for key in bsteps: rb_names[key] = "ddpg-{}_balancing-{:06d}-1010".format( key, int(round(100000 * bsteps[key]))) wsteps = {} for key in bsteps: wsteps[key] = steps[key] - bsteps[key] L0, L1, L2, L3 = [], [], [], [] options = [] for r in itertools.product([700], reassess_for, runs): options.append(r) configs = { "{}_walking".format(env): "Roboschool{}GRL-v1".format(env), } L1 += rl_run(configs, alg, options) ##### for key in keys: ## Zero-shot balancing Walker2d options = [] for r in itertools.product([bsteps[key]], reassess_for, runs): options.append(r) configs = { "{}_balancing".format(key): "Roboschool{}BalancingGRL-v1".format(env), } L0 += rl_run(configs, alg, options, rb_save=True) #### options = [] for r in itertools.product([wsteps[key]], reassess_for, runs): options.append(r) configs = { "{}_walking_after_balancing".format(key): "Roboschool{}GRL-v1".format(env), } L2 += rl_run(configs, alg, options, load_file=rb_names[key]) #### reassess_for = [''] options = [] for r in itertools.product([wsteps[key]], reassess_for, runs): options.append(r) configs = { "{}_walking_after_balancing".format(key): "Roboschool{}GRL-v1".format(env), } L3 += rl_run(configs, alg, options, load_file=rb_names[key], rb_load=rb_names[key]) #### do_multiprocessing_pool(arg_cores, L0) L = L1 + L2 + L3 random.shuffle(L) do_multiprocessing_pool(arg_cores, L)
def rl_run(dict_of_cfgs, alg, options, save=True, load_file='', rb_save=False, rb_load=''): list_of_new_cfgs = [] loc = "tmp" if not os.path.exists(loc): os.makedirs(loc) for key in dict_of_cfgs: args = parse_args() cfg = dict_of_cfgs[key] for o in options: str_o = opt_to_str(o) str_o += '-' + boolList2BinString( [save, bool(load_file), rb_save, bool(rb_load)]) if not str_o: str_o += "mp{}".format(o[-1]) else: str_o += "-mp{}".format(o[-1]) print("Generating parameters: {}".format(str_o)) # create local filename list_of_new_cfgs.append("{}/{}-{}-{}.yaml".format( loc, alg, key, str_o)) args['cfg'] = cfg args['steps'] = o[0] * 1000 args['reassess_for'] = o[1] if len(o) > 5: args['curriculum'] = 'rwForward_{};rwTime_{};rwWork_{}'.format( o[2], o[3], o[4]) args['save'] = save if 'curriculum' in key: args['curriculum'] = 'rwForward_50_300_10' if load_file: args['load_file'] = "{}-mp{}".format(load_file, o[-1]) args['output'] = "{}-{}-{}".format(alg, key, str_o) if rb_save: args['rb_save_filename'] = args['output'] if rb_load: args['rb_load_filename'] = "{}-mp{}".format(rb_load, o[-1]) with io.open(list_of_new_cfgs[-1], 'w', encoding='utf8') as file: yaml.dump(args, file, default_flow_style=False, allow_unicode=True) print(list_of_new_cfgs) return list_of_new_cfgs
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # for working with yaml files _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG yaml.add_representer(collections.OrderedDict, dict_representer) yaml.add_constructor(_mapping_tag, dict_constructor) # Parameters runs = range(16) # create perturbed models of leo model_paths = ( '/home/ivan/work/Project/Software/grl/src/grl/addons/rbdl/cfg/leo_vc', '/grl/src/grl/addons/rbdl/cfg/leo_vc', ) models, names = create_models(model_paths) tasks, names = create_tasks(models, names) args['cl_depth'] = 2 options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } starting_task = 'balancing_tf' mp_cfgs = [] for task, name in zip(tasks, names): misc = {'tasks': task, 'starting_task': starting_task, 'runs': runs} export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) mp_cfgs += do_network_based_leo(args, cores, name='ddpg-cl_short_' + name, nn_params=nn_params, options=options, **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bw', steps=( -1, 50000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-w', steps=( -1, -1, 300000), **misc) # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc) # # walker2d # tasks = { # 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', # 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', # 'walking': 'RoboschoolWalker2dGRL-v1' # } # misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} # mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() # do_multiprocessing_pool(cores, mp_cfgs) config, tasks, starting_task = mp_cfgs[0] cl_run(tasks, starting_task, **config)
def main(): params = {} params['steps_of_history'] = 2 params['zero_padding_after'] = 1 params['damamge_threshold'] = 10000.0 params['indi_norm'] = True params['damage_norm'] = 'to_reward' params['stage_norm'] = '' #'cetered' params['neg_damage'] = True config = parse_args() config['default_damage'] = 4132.00 config["cl_stages"] = "balancing_tf;balancing;walking:monotonic" config["cl_tau"] = 0.001 config['cl_dropout_keep'] = 0.7 #0.7 config["cl_l2_reg"] = 0.001 config["minibatch_size"] = 128 random_shuffle = True test_percentage = 0.3 # config["cl_lr"] = 0.001 # training_epochs = 10000 # export_names = "long_curriculum_network" config["cl_lr"] = 0.01 training_epochs = 10000 export_names = "eq_curriculum_network_depth_" + str( params['steps_of_history']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) # debug options #training_epochs = 100 #random_shuffle = False #True #dd = load_data('leo_supervised_learning_regression2/', params, gens = [2]) dd = load_data('leo_supervised_learning_regression_new/', params, gens=range(1, 7)) # Rule-based processing before splitting dd = clean_data(dd, params) #dd = process_data(dd, config) dd = select_good(dd, config, 0.3) dd = calc_weights(dd) # plot_data(dd) # #def aaa(): # split into training and testing sets idx = np.arange(len(dd)) if random_shuffle: np.random.shuffle(idx) test_idx = int(len(dd) * test_percentage) dd_train = [d for i, d in enumerate(dd) if i in idx[test_idx:]] dd_test = [d for i, d in enumerate(dd) if i in idx[:test_idx]] # normalize training dataset and use moments to normalizing test dataset dd_train, data_norm, damage_norm = normalize_data(dd_train, config, params) dd_test, _, _ = normalize_data(dd_test, config, params, data_norm, damage_norm) # save means and std for usage in RL config['cl_depth'] = params['steps_of_history'] config['cl_pt_shape'] = (params['steps_of_history'], dim) pt = PerformanceTracker(config, input_norm=data_norm, output_norm=damage_norm) pt.save(nn_params[1]) # get stat seq_train = seq_cut(dd_train, params) seq_test = seq_cut(dd_test, params) # calculate class weights #class_weight = calc_class_weight(dd_train, params) # fill in replay beuffer print(len(seq_train), len(seq_test)) plot_train, plot_test = [], [] plt.gca().set_color_cycle(['red', 'green', 'blue']) tf.reset_default_graph() with tf.Graph().as_default() as sl: #cl_nn = cl_critic(pt.get_v_size(), config) #cl_nn = cl_critic_target(pt.get_v_size(), config) cl_nn = cl_rnn_classification(pt.get_v_size(), config) display_step = 10 with tf.Session(graph=sl) as sess: # random initialization of variables sess.run(tf.global_variables_initializer()) # Training cycle for epoch in range(training_epochs): if random_shuffle: np.random.shuffle(seq_train) avg_loss = 0. # Loop over all sequences for seq in seq_train: # Run optimization op (backprop) and cost op (to get loss value) x = np.reshape(seq['seq_data'], [-1, params['steps_of_history'], 3]) y = np.reshape(seq['seq_softmax_stage'], [-1, 3]) class_counter = np.sum(y, axis=0) class_counter = np.reshape(class_counter, [-1, 1]) class_weight = (sum(class_counter) - class_counter) / sum(class_counter) weights = y.dot(class_weight) if np.any(weights): weights = weights / np.linalg.norm(weights) # weights = weights * seq['seq_weight'] _, loss = cl_nn.train(sess, x, y, class_weight=weights) avg_loss += loss else: print("Oops!" + str(class_counter)) # Compute average loss avg_loss += avg_loss / len(seq_train) # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "loss=", "{:.9f}".format(avg_loss)) if epoch % 100 == 0: plot_train.append( calc_error(sess, cl_nn, seq_train, params)) plot_test.append(calc_error(sess, cl_nn, seq_test, params)) plot_train_ = np.reshape(plot_train, (-1, 3)) plot_test_ = np.reshape(plot_test, (-1, 3)) plt.plot(plot_train_, linestyle='-') plt.plot(plot_test_, linestyle=':') plt.pause(0.05) print("Optimization Finished!") cl_nn.save(sess, nn_params[0]) plt.show(block=True)
def main(): prepare_multiprocessing() alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # important defaults # 2-stage curriculum # args['cl_structure'] = 'cl:_1' # starting_task = 'balancing' # 3-stage curriculum args['cl_structure'] = 'cl:fc__2' starting_task = 'balancing_tf' args['mp_debug'] = True args['perf_td_error'] = True args['perf_l2_reg'] = True args['rb_min_size'] = 1000 args['reach_return'] = 1422.66 args['default_damage'] = 4035.00 args['steps'] = 300000 args['cl_depth'] = 1 args['cl_l2_reg'] = 1000 # well-posing problem args['cl_cmaes_sigma0'] = 1.0 popsize = 4 resample = 4 reeval_num0 = 5 G = 500 use_mp = True reeval = True # args['mp_debug'] = False # args['steps'] = 1500 # popsize = 4 # resample = 4 # #reeval_num0 = 2 # #args['seed'] = 1 # G = 10 # use_mp = False # reeval = False # Tasks tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } root = "cl" if not os.path.exists(root): os.makedirs(root) # calulate number of weights pt = PerformanceTracker(depth=args['cl_depth'], input_norm=args["cl_running_norm"]) input_dim = pt.get_v_size() w_num = 0 fan_in = input_dim for layer in args["cl_structure"].split(";"): _, size = layer.split("_") w_num += fan_in * int(size) + int(size) fan_in = int(size) #opt = opt_cmaes(args, w_num, popsize, reeval_num0) search_space = (-1.0, 1.0) opt = opt_bo(args, w_num, popsize, resample, search_space) hp = Helper(args, root, alg, tasks, starting_task, arg_cores, use_mp=use_mp) g = 1 while not opt.stop() and g <= G: if args['mp_debug']: sys.stdout = Logger(root + "/stdout-g{:04}.log".format(g)) print("Should work") solutions = opt.ask() if args["cl_reparam"] == "spherical": resol = [] for s in solutions: resol.append(cart2sph(s)) elif args["cl_reparam"] == "cartesian": resol = solutions # preparation mp_cfgs = hp.gen_cfg(resol, g) # evaluating damage = hp.run(mp_cfgs) # update using *original* solutions _, rejected = opt.tell(solutions, damage) # reevaluation to prevent prepature convergence if reeval: opt.reeval(g, solutions, damage, hp) # logging opt.log(root, alg, g, hp.damage_info, hp.reeval_damage_info, rejected) opt.save(root, 'opt.pkl') # new iteration g += 1
def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(5) reassess_for = [''] ##### # Curriculum bsteps = {"Hopper": 100, "HalfCheetah": 100, "Walker2d": 100} steps = {"Hopper": 500, "HalfCheetah": 500, "Walker2d": 500} rb_names = {} for key in bsteps: rb_names[key] = "ddpg-{}_balancing-{:06d}-1010".format( key, int(round(100000 * bsteps[key]))) wsteps = {} for key in bsteps: wsteps[key] = steps[key] - bsteps[key] ##### ## Zero-shot balancing Hopper options = [] for r in itertools.product([bsteps["Hopper"]], reassess_for, runs): options.append(r) configs = { "Hopper_balancing": "RoboschoolHopperBalancingGRL-v1", } L0_H = rl_run(configs, alg, options, rb_save=True) ## Zero-shot balancing HalfCheetah options = [] for r in itertools.product([bsteps["HalfCheetah"]], reassess_for, runs): options.append(r) configs = { "HalfCheetah_balancing": "RoboschoolHalfCheetahBalancingGRL-v1", } L0_C = rl_run(configs, alg, options, rb_save=True) ## Zero-shot balancing Walker2d options = [] for r in itertools.product([bsteps["Walker2d"]], reassess_for, runs): options.append(r) configs = { "Walker2d_balancing": "RoboschoolWalker2dBalancingGRL-v1", } L0_W = rl_run(configs, alg, options, rb_save=True) ##### ##### ## Zero-shot walking Hopper & HalfCheetah & Walker2d options = [] for r in itertools.product([steps["Hopper"]], reassess_for, runs): options.append(r) configs = { "Hopper_walking": "RoboschoolHopperGRL-v1", } L1_H = rl_run(configs, alg, options) options = [] for r in itertools.product([steps["HalfCheetah"]], reassess_for, runs): options.append(r) configs = { "HalfCheetah_walking": "RoboschoolHalfCheetahGRL-v1", } L1_C = rl_run(configs, alg, options) options = [] for r in itertools.product([steps["Walker2d"]], reassess_for, runs): options.append(r) configs = { "Walker2d_walking": "RoboschoolWalker2dGRL-v1", } L1_W = rl_run(configs, alg, options) ##### ##### ## Only neural network without replay buffer Hopper options = [] for r in itertools.product([wsteps["Hopper"]], reassess_for, runs): options.append(r) configs = { "Hopper_walking_after_balancing": "RoboschoolHopperGRL-v1", } L2_H = rl_run(configs, alg, options, load_file=rb_names["Hopper"]) ## Only neural network without replay buffer HalfCheetah options = [] for r in itertools.product([wsteps["HalfCheetah"]], reassess_for, runs): options.append(r) configs = { "HalfCheetah_walking_after_balancing": "RoboschoolHalfCheetahGRL-v1", } L2_C = rl_run(configs, alg, options, load_file=rb_names["HalfCheetah"]) ## Only neural network without replay buffer Walker2d options = [] for r in itertools.product([wsteps["Walker2d"]], reassess_for, runs): options.append(r) configs = { "Walker2d_walking_after_balancing": "RoboschoolWalker2dGRL-v1", } L2_W = rl_run(configs, alg, options, load_file=rb_names["Walker2d"]) #### #### ## Replay buffer Hopper reassess_for = ['walking_3_-1.5', ''] options = [] for r in itertools.product([wsteps["Hopper"]], reassess_for, runs): options.append(r) configs = { "Hopper_walking_after_balancing": "RoboschoolHopperGRL-v1", } L3_H = rl_run(configs, alg, options, rb_load=rb_names["Hopper"]) L4_H = rl_run(configs, alg, options, load_file=rb_names["Hopper"], rb_load=rb_names["Hopper"]) ## Replay buffer HalfCheetah reassess_for = ['walking_3_-1.5', ''] options = [] for r in itertools.product([wsteps["HalfCheetah"]], reassess_for, runs): options.append(r) configs = { "HalfCheetah_walking_after_balancing": "RoboschoolHalfCheetahGRL-v1", } L3_C = rl_run(configs, alg, options, rb_load=rb_names["HalfCheetah"]) L4_C = rl_run(configs, alg, options, load_file=rb_names["HalfCheetah"], rb_load=rb_names["HalfCheetah"]) ## Replay buffer Walker2d reassess_for = ['walking_3_-1.5', ''] options = [] for r in itertools.product([wsteps["Walker2d"]], reassess_for, runs): options.append(r) configs = { "Walker2d_walking_after_balancing": "RoboschoolWalker2dGRL-v1", } L3_W = rl_run(configs, alg, options, rb_load=rb_names["Walker2d"]) L4_W = rl_run(configs, alg, options, load_file=rb_names["Walker2d"], rb_load=rb_names["Walker2d"]) #### do_multiprocessing_pool(arg_cores, L0_H + L0_C + L0_W) #do_multiprocessing_pool(arg_cores, L0_C+L0_W) L = L1_H + L1_C + L1_W + L2_H + L2_C + L2_W + L3_H + L3_C + L3_W + L4_H + L4_C + L4_W #L = L1_H + L1_W + L2_H + L2_W + L3_H + L3_W + L4_H + L4_W #L = L1_C + L1_W + L2_C + L2_W + L3_C + L3_W + L4_C + L4_W random.shuffle(L) do_multiprocessing_pool(arg_cores, L)
def main(): alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) # Parameters runs = range(4) rb_min_size = [1000] reassess_for = [''] # Torso balancing rehab steps = [20] options = [] for r in itertools.product(steps, rb_min_size, reassess_for, runs): options.append(r) configs = { "balancing_tf": "cfg/leo_balancing_tf.yaml", } L00 = rl_run(configs, alg, options, rb_save=True) # torso balancing steps = [30] options = [] for r in itertools.product(steps, rb_min_size, reassess_for, runs): options.append(r) configs = { "balancing": "cfg/leo_balancing.yaml", } # L01 = rl_run(configs, alg, options, rb_save=True, load_file="ddpg-balancing_tf-2000000-{}-1010") L02 = rl_run(configs, alg, options, rb_save=True, load_file="ddpg-balancing_tf-2000000-{}-1010", rb_load="ddpg-balancing_tf-2000000-{}-1010") ## Zero-shot walking steps = [300] options = [] for r in itertools.product(steps, rb_min_size, reassess_for, runs): options.append(r) configs = { # "walking" : "cfg/leo_walking.yaml", } L1 = rl_run(configs, alg, options) ## Only neural network without replay buffer steps = [250] options = [] for r in itertools.product(steps, rb_min_size, reassess_for, runs): options.append(r) configs = { # "walking_after_balancing" : "cfg/leo_walking.yaml", } L2 = rl_run(configs, alg, options, load_file="ddpg-balancing-3000000-{}-1111") ## Replay buffer steps = [250] reassess_for = [''] #['walking', ''] options = [] for r in itertools.product(steps, rb_min_size, reassess_for, runs): options.append(r) configs = { "walking_after_balancing": "cfg/leo_walking.yaml", } # L3 = rl_run(configs, alg, options, rb_load="ddpg-balancing-3000000-{}-1111") L4 = rl_run(configs, alg, options, load_file="ddpg-balancing-3000000-{}-1111", rb_load="ddpg-balancing-3000000-{}-1111") # Execute learning #do_multiprocessing_pool(arg_cores, L0+L1) #L = L1+L2+L3+L4 #random.shuffle(L) #do_multiprocessing_pool(arg_cores, L) do_multiprocessing_pool(arg_cores, L00) do_multiprocessing_pool(arg_cores, L02) L = L4 random.shuffle(L) do_multiprocessing_pool(arg_cores, L)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # for working with yaml files _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG yaml.add_representer(collections.OrderedDict, dict_representer) yaml.add_constructor(_mapping_tag, dict_constructor) # Parameters runs = range(16) args['cl_depth'] = 2 options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) starting_task = 'balancing_tf' mp_cfgs = [] # leo tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_leo(args, cores, name='ddpg-leo', nn_params=nn_params, options=options, **misc) # leo_perturbed tasks = { 'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml', 'balancing': 'cfg/leo_perturbed_balancing.yaml', 'walking': 'cfg/leo_perturbed_walking.yaml' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_leo(args, cores, name='ddpg-leo_perturbed', nn_params=nn_params, options=options, **misc) # walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-walker2d', nn_params=nn_params, options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): params = {} params['steps_of_history'] = 1 params['zero_padding_after'] = 0 params['damamge_threshold'] = 10000.0 params['indi_norm'] = True params['damage_norm'] = 'to_reward' params['stage_norm'] = 'cetered' params['neg_damage'] = True gamma = 1.0 config = parse_args() config['default_damage'] = 4035.00 config["cl_lr"] = 0.01 config["cl_tau"] = 0.001 #config['cl_structure'] = 'ffcritic:fc_relu_2;fc_relu_2;fc_relu_1' # config["cl_batch_norm"] = True config['cl_dropout_keep'] = 0.7 config["cl_l2_reg"] = 0.000001 config["minibatch_size"] = 128 #dd = load_data('leo_supervised_learning_regression2/', params, gens = [2]) dd = load_data('leo_supervised_learning_regression/', params, gens=range(1, 7)) # Rule-based processing before splitting dd = clean_data(dd, params) dd = process_data(dd, config) # split into training and testing sets test_percentage = 0.0 idx = np.arange(len(dd)) np.random.shuffle(idx) test_idx = int(len(dd) * test_percentage) dd_train = [d for i, d in enumerate(dd) if i in idx[test_idx:]] dd_test = [d for i, d in enumerate(dd) if i in idx[:test_idx]] # normalize training dataset and use moments to normalizing test dataset dd_train, data_norm, damage_norm = normalize_data(dd_train, config, params) dd_test, _, _ = normalize_data(dd_test, config, params, data_norm, damage_norm) # save means and std for usage in RL pt = PerformanceTracker(depth=config['cl_depth'], running_norm=config["cl_running_norm"], input_norm=data_norm, output_norm=damage_norm, dim=3) pt.save('data_damage_norms.pkl') # get stat seq_cut(dd_train, params) seq_cut(dd_test, params) # fill in replay beuffer rb_train = fill_replay_buffer(dd_train, config) rb_test = fill_replay_buffer(dd_test, config) #config["minibatch_size"] = rb_train.replay_buffer_count with tf.Graph().as_default() as ddpg_graph: #cl_nn = cl_critic(pt.get_v_size(), config) #cl_nn = cl_critic_target(pt.get_v_size(), config) cl_nn = cl_ff_regression(pt.get_v_size(), config) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15) x, td_error_, mb_td_error_, train_td_error_, test_td_error_ = [], [], [], [], [] plt.ion() with tf.Session(graph=ddpg_graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # random initialization of variables sess.run(tf.global_variables_initializer()) minibatch_size = config["minibatch_size"] for i in range(200000): s_batch, a_batch, r_batch, t_batch, s2_batch = rb_train.sample_batch( minibatch_size) # Calculate targets qq_val = [] for stage in range(0, 3): a_max = (stage - 1) * np.ones((minibatch_size, 1)) qq_val.append(cl_nn.predict(sess, s2_batch, a_batch=a_max)) q_val = np.concatenate(qq_val, axis=1) q_max = np.max(q_val, axis=1) q_max = np.reshape(q_max, newshape=(minibatch_size, 1)) y_i = [] for k in range(minibatch_size): if t_batch[k]: y_i.append(r_batch[k]) else: y_i.append(r_batch[k] + gamma * q_max[k][0]) # target_q: list -> float if i % 500 == 0: q_i = cl_nn.predict(sess, s_batch, a_batch=a_batch) td_error = np.sum( np.abs(q_i - np.reshape(y_i, newshape=( minibatch_size, 1)))) / minibatch_size cl_nn.train(sess, s_batch, np.reshape(y_i, (minibatch_size, 1)), a_batch=a_batch) # testing if i % 500 == 0: not_biases = [ v for v in cl_nn.network_params() if '/b:' not in v.name ] print(sess.run(not_biases)) print(min(q_max), max(q_max)) mb_td_error = calc_td_error(sess, cl_nn, config, s_batch, a_batch, r_batch, t_batch, s2_batch, minibatch_size) s_batch, a_batch, r_batch, t_batch, s2_batch = rb_train.sample_batch( rb_train.replay_buffer_count) train_td_error = calc_td_error(sess, cl_nn, config, s_batch, a_batch, r_batch, t_batch, s2_batch, rb_train.replay_buffer_count) s_batch, a_batch, r_batch, t_batch, s2_batch = rb_test.sample_batch( rb_test.replay_buffer_count) test_td_error = calc_td_error(sess, cl_nn, config, s_batch, a_batch, r_batch, t_batch, s2_batch, rb_test.replay_buffer_count) print(td_error, mb_td_error, train_td_error, test_td_error) x.append(i) td_error_.append(td_error) mb_td_error_.append(mb_td_error) train_td_error_.append(train_td_error) test_td_error_.append(test_td_error) plt.plot(x, td_error_, 'r') plt.plot(x, mb_td_error_, 'g') plt.plot(x, train_td_error_, 'b') plt.plot(x, test_td_error_, 'k') plt.pause(0.05) if i % 5000 == 0: cl_nn.save(sess, 'cl_network', global_step=i) plt.show(block=True)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} args['cl_keep_samples'] = True options = { 'balancing_tf': '', 'balancing': 'nnload', 'walking': 'nnload_rbload' } mp_cfgs = [] # # naive switching after achieving the balancing for n number of seconds happening once. 0 means not used # args['reach_timeout_num'] = 1 # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb5', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb5', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb20', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb20', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc) # # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # args['reach_timeout_num'] = 2 # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb55', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb55', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb2020', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb2020', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc) args['reach_timeout_num'] = 5 mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb55555', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb55555', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)