fig, axes = plt.subplots(nrows=MAX_EXP, ncols=2) if MAX_EXP == 1: axes = np.expand_dims(axes, axis=0) colors = [(0, 0.43, 0.73), (0, 0.73, 0.43)] use_entropy_cost = [False, True] nn_structure = { 'd_feat': 20, 'feat_layers': [40], 'mean_layers': [40], 'scale_layers': [40], 'mixing_layers': [40] } mdnmp = MDNMP(n_comps=nmodel, d_input=2, d_output=20, nn_structure=nn_structure) for expID in range(MAX_EXP): print('=========== Exp: %1d ============' % (expID)) for kid in range(len(use_entropy_cost)): if use_entropy_cost[kid]: mdnmp.lratio['entropy'] = 1.0 print("===> train entropy MDN") else: mdnmp.lratio['entropy'] = 0.0 print("===> train original MDN") mdnmp.build_mdn(learning_rate=0.00005) mdnmp.init_train() mdnmp.train(train_goals,
vmps = vmps * 100 if options.is_grid_samples: _, ids = get_training_data_from_2d_grid(options.ntrain, queries=queries) trdata = data[ids,:] trvmps = vmps[ids,:] rstates = np.random.randint(0, 100, size=options.expnum) d_input = np.shape(queries)[-1] d_output = np.shape(vmps)[1] mdnmp_struct = {'d_feat': 20, 'feat_layers': [40], 'mean_layers': [60], 'scale_layers': [60], 'mixing_layers': [20]} mdnmp = MDNMP(n_comps=options.nmodel, d_input=d_input, d_output=d_output, nn_structure=mdnmp_struct, scaling=1.0, var_init=VAR_INIT) nn_structure = {'d_feat': 20, 'feat_layers': [40], 'mean_layers': [60], 'scale_layers': [60], 'mixing_layers': [10], 'discriminator': [20], 'lambda': [10], 'd_response': [40,5], 'd_context': [20,5]} gmgan = GMGAN(n_comps=options.nmodel, context_dim=d_input, response_dim=d_output, nn_structure=nn_structure, scaling=1, var_init=VAR_INIT, var_init_dis=VAR_INIT_DIS, batch_size=100) # start experiment
parser = OptionParser() parser.add_option("-m", "--nmodel", dest="nmodel", type="int", default=2) parser.add_option("-n", "--num_exp", dest="expnum", type="int", default=1) (options, args) = parser.parse_args(sys.argv) mdnmp_struct = { 'd_feat': 10, 'feat_layers': [20], 'mean_layers': [20], 'scale_layers': [20], 'mixing_layers': [10] } mdnmp = MDNMP(n_comps=options.nmodel, d_input=1, d_output=2, nn_structure=mdnmp_struct, scaling=1, var_init=VAR_INIT) mdnmp.lratio = { 'likelihood': 1, 'mce': 0, 'regularization': 0, 'failure': 0, 'eub': 0 } max_epochs = 30000 lrate = 0.00005 num_train = [60]
def run_mdnmp_for_hitball(nmodel=3, MAX_EXPNUM=20, use_entropy_cost=[False, True], model_names=["Original MDN", "Entropy MDN"], nsamples=[1, 10, 30, 50, 70], env_file="hitball_exp_v0.xml", data_dir="hitball_mpdata_v0", isvel=False, EXP=Armar6HitBallExpV0): # prepare data data_dir = os.environ['MPGEN_DIR'] + EXP_DIR + data_dir queries = np.loadtxt(data_dir + '/hitball_queries.csv', delimiter=',') vmps = np.loadtxt(data_dir + '/hitball_weights.csv', delimiter=',') starts = np.loadtxt(data_dir + '/hitball_starts.csv', delimiter=',') goals = np.loadtxt(data_dir + '/hitball_goals.csv', delimiter=',') if np.shape(queries)[-1] == np.shape(goals)[0]: queries = np.expand_dims(queries, axis=-1) inputs = np.concatenate([queries, starts, goals], axis=1) # prepare model nn_structure = {'d_feat': 20, 'feat_layers': [40], 'mean_layers': [60], 'scale_layers': [60], 'mixing_layers': [10]} d_input = np.shape(queries)[-1] d_output = np.shape(vmps)[1] mp = VMP(dim=2, kernel_num=10) mdnmp = MDNMP(n_comps=nmodel, d_input=d_input, d_output=d_output, nn_structure=nn_structure, var_init=VAR_INIT) rstates = np.random.randint(0, 100, size=MAX_EXPNUM) n_test = 100 srates = {} allres = np.zeros(shape=(len(model_names), MAX_EXPNUM, len(nsamples))) for modelId in range(len(model_names)): if use_entropy_cost[modelId]: mdnmp.lratio['entropy'] = 20 else: mdnmp.lratio['entropy'] = 0 csrates = np.zeros(shape=(MAX_EXPNUM,len(nsamples))) for expId in range(MAX_EXPNUM): mdnmp.build_mdn(learning_rate=0.0001) mdnmp.init_train() trdata, tdata, trvmps, tvmps = train_test_split(inputs, vmps, test_size=0.9, random_state=rstates[expId]) print("use {} data for training and {} data for testing".format(np.shape(trdata)[0], np.shape(tdata)[0])) print("======== Exp: {} with {} ========".format(expId, model_names[modelId])) is_pos = np.ones(shape=(np.shape(trvmps)[0], 1)) trqueries = trdata[:,0:d_input] mdnmp.train(trqueries, trvmps, is_pos, max_epochs=10000, is_load=False, is_save=False) tqueries = tdata[:n_test, 0:d_input] tstarts = tdata[:n_test, d_input:d_input+2] tgoals = tdata[:n_test, d_input+2:] for sampleId in range(len(nsamples)): wout, _ = mdnmp.predict(tqueries, nsamples[sampleId]) if isvel: srate = evaluate_hitball(wout, tqueries, tstarts, tgoals, low_ctrl=TaskSpaceVelocityController, high_ctrl=TaskSpacePositionVMPController(mp), env_path=ENV_DIR + env_file, EXP=EXP) else: srate = evaluate_hitball(wout, tqueries, tstarts, tgoals, low_ctrl=TaskSpaceImpedanceController, high_ctrl=TaskSpacePositionVMPController(mp), env_path=ENV_DIR + env_file, EXP=EXP) csrates[expId, sampleId] = srate allres[modelId, expId, sampleId] = srate srates[model_names[modelId]] = np.mean(csrates, axis=0) return srates, allres
def run_mdnmp_for_balanceball(nmodel=3, MAX_EXPNUM=20, mce_vals=[0, 0.5, 1, 5, 10], nsamples=[10, 30, 50], env_file="balanceball_exp_v1.xml", data_dir="balanceball_mpdata", isdraw=False, isRecordSuccess=False, dirname='result'): # prepare data data_dir = os.environ['MPGEN_DIR'] + EXP_DIR + data_dir queries = np.loadtxt(data_dir + '/balanceball_queries.csv', delimiter=',') vmps = np.loadtxt(data_dir + '/balanceball_weights.csv', delimiter=',') starts = np.loadtxt(data_dir + '/balanceball_starts.csv', delimiter=',') goals = np.loadtxt(data_dir + '/balanceball_goals.csv', delimiter=',') if np.shape(queries)[-1] == np.shape(goals)[0]: queries = np.expand_dims(queries, axis=-1) inputs = np.concatenate([queries, starts, goals], axis=1) # prepare model nn_structure = { 'd_feat': 40, 'feat_layers': [20], 'mean_layers': [60], 'scale_layers': [60], 'mixing_layers': [20] } d_input = np.shape(queries)[-1] d_output = np.shape(vmps)[1] mp = QVMP(kernel_num=10, elementary_type='minjerk') rstates = np.random.randint(0, 100, size=MAX_EXPNUM) n_test = 50 for expId in range(MAX_EXPNUM): trdata, tdata, trvmps, tvmps = train_test_split( inputs, vmps, test_size=0.95, random_state=rstates[expId]) print("use {} data for training and {} data for testing".format( np.shape(trdata)[0], np.shape(tdata)[0])) for modelId in range(len(mce_vals)): print("======== Exp: {} with {} ========".format( expId, mce_vals[modelId])) mdnmp = MDNMP(n_comps=nmodel, d_input=d_input, d_output=d_output, nn_structure=nn_structure, var_init=VAR_INIT, scaling=1.0) mdnmp.lratio['entropy'] = mce_vals[modelId] # if model_names[modelId] == "entropy_mdn": # mdnmp.lratio['mce'] = 0.5 # else: # mdnmp.lratio['mce'] = 0 mdnmp.build_mdn(learning_rate=0.0002) mdnmp.init_train() is_pos = np.ones(shape=(np.shape(trvmps)[0], 1)) trqueries = trdata[:, 0:d_input] mdnmp.train(trqueries, trvmps, is_pos, max_epochs=30000, is_load=False, is_save=False) tqueries = tdata[:n_test, 0:d_input] starts = tdata[:n_test, d_input:d_input + 4] goals = tdata[:n_test, d_input + 4:] res = np.zeros(shape=(1, len(nsamples))) for sampleId in range(len(nsamples)): wout, outdict = mdnmp.predict(tqueries, nsamples[sampleId]) srate = evaluate_balanceball( wout, tqueries, starts, goals, low_ctrl=TaskSpaceVelocityController, high_ctrl=TaskSpacePositionVMPController(qvmp=mp), env_path=ENV_DIR + env_file, isdraw=isdraw, isRecordSuccess=isRecordSuccess) res[0, sampleId] = srate with open(dirname + "/" + "mdn_" + str(mce_vals[modelId]), "a") as f: np.savetxt(f, np.array(res), delimiter=',', fmt='%.3f')
'd_feat': 100, 'feat_layers': [1000, 400], 'mean_layers': [200, 100], 'scale_layers': [200, 100], 'mixing_layers': [200, 20] } d_input = np.shape(queries)[-1] d_output = np.shape(weights)[1] trqueries = queries trweights = weights mdnmp = MDNMP(n_comps=options.nmodel, d_input=d_input, d_output=d_output, nn_structure=nn_structure, var_init=VAR_INIT, scaling=1.0) lrate = 0.00002 if options.model_name == "omce": mdnmp.lratio['entropy'] = 10 mdnmp.is_orthogonal_cost = True mdnmp.is_mce_only = True mdnmp.is_normalized_grad = False mdnmp.cross_train = True mdnmp.nll_lrate = lrate mdnmp.ent_lrate = 10 * lrate elif options.model_name == "elk": mdnmp.lratio['entropy'] = 10
def run_mdnmp_for_docking(nmodel=3, MAX_EXPNUM=20, use_entropy_cost=[False, True], model_names=["Original MDN", "Entropy MDN"], nsamples=[1, 10, 30, 50, 70]): queries = np.loadtxt('data/docking_queries.csv', delimiter=',') vmps = np.loadtxt('data/docking_weights.csv', delimiter=',') starts = np.loadtxt('data/docking_starts.csv', delimiter=',') goals = np.loadtxt('data/docking_goals.csv', delimiter=',') # clean the data wtest = np.expand_dims(vmps, axis=1) cc, successId = evaluate_docking(wtest, queries, starts, goals) data = np.concatenate([queries, starts, goals], axis=1) data = data[successId, :] vmps = vmps[successId, :] knum = np.shape(vmps)[1] rstates = np.random.randint(0, 100, size=MAX_EXPNUM) srates = {} nn_structure = { 'd_feat': 20, 'feat_layers': [40], 'mean_layers': [60], 'scale_layers': [60], 'mixing_layers': [60] } for k in range(len(use_entropy_cost)): mdnmp = MDNMP(n_comps=nmodel, d_input=6, d_output=knum, nn_structure=nn_structure, scaling=1) if use_entropy_cost[k]: mdnmp.lratio['entropy'] = 1 else: mdnmp.lratio['entropy'] = 0 csrates = np.zeros(shape=(MAX_EXPNUM, len(nsamples))) for expId in range(MAX_EXPNUM): trdata, tdata, trvmps, tvmps = train_test_split( data, vmps, test_size=0.3, random_state=rstates[expId]) trdata, _, trvmps, _ = train_test_split( trdata, trvmps, test_size=0.3, random_state=rstates[expId]) print("use {} data for training and {} data for testing".format( np.shape(trdata)[0], np.shape(tdata)[0])) print("======== Exp: {} with {} ========".format( expId, model_names[k])) weights = np.ones(shape=(np.shape(trvmps)[0], 1)) train_weights = np.copy(weights) trqueries = trdata[:, 0:6] mdnmp.build_mdn(learning_rate=0.00003) mdnmp.init_train() mdnmp.train(trqueries, trvmps, train_weights, max_epochs=20000, is_load=False, is_save=False) tqueries = tdata[:, 0:6] for i in range(len(nsamples)): wout, _ = mdnmp.predict(tqueries, nsamples[i]) starts = tdata[:, 6:8] goals = tdata[:, 8:10] srate, _ = evaluate_docking(wout, tqueries, starts, goals) csrates[expId, i] = srate srates[model_names[k]] = np.mean(csrates, axis=0) return srates