def atari_learn(env, num_timesteps): # This is just a rough estimate num_iterations = float(num_timesteps) / 4.0 lr_multiplier = 1.0 lr_schedule = PiecewiseSchedule([ (0, 1e-4 * lr_multiplier), (num_iterations / 10, 1e-4 * lr_multiplier), (num_iterations / 2, 5e-5 * lr_multiplier), ], outside_value=5e-5 * lr_multiplier) lr_lambda = lambda t: lr_schedule.value(t) optimizer = poison_dqn.OptimizerSpec(constructor=torch.optim.Adam, kwargs=dict(eps=1e-4), lr_lambda=lr_lambda) def stopping_criterion(env, t): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name( env, "Monitor").get_total_steps() >= num_timesteps exploration_schedule = PiecewiseSchedule([ (0, 1.0), (1e6, 0.1), (num_iterations / 2, 0.01), ], outside_value=0.01) model_input_dir = "./data/dqn_Pong_double_dqn_PongNoFrameskip-v4_04-02-2020_17-14-18/2333/" poison_dqn.learn(env=env, q_func=DQN, optimizer_spec=optimizer, model_input_dir=model_input_dir, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=1000000, batch_size=32, gamma=0.99, learning_starts=50000, learning_freq=4, frame_history_len=4, target_update_freq=10000, grad_norm_clipping=10, double_q=True) env.close()
def main(layers,t_hor,ind,nrolls,bts,ler_r,mom,teps,renew,imp,q): # Quad Params max_list = [0.1,0.1,11.81]; #w=1 min_list = [-0.1,-0.1,7.81]; max_list_ = [0.5,0.5,0.5] min_list_ = [-0.5,-0.5,-0.5] g = 9.81; print 'Starting worker-' + str(ind) f = 1; Nx = 100*f + 1; minn = [-5.0,-10.0,-5.0,-10.0,0.0,-10.0]; maxx = [ 5.0, 10.0, 5.0, 10.0,2*np.pi, 10.0]; X = np.linspace(minn[0],maxx[0],Nx); Y = np.linspace(minn[2],maxx[2],Nx); Z = np.linspace(minn[4],maxx[4],Nx); X_,Y_,Z_ = np.meshgrid(X, Y, Z); X,Y = np.meshgrid(X, Y); XX = np.reshape(X,[-1,1]); YY = np.reshape(Y,[-1,1]); XX_ = np.reshape(X_,[-1,1]); YY_ = np.reshape(Y_,[-1,1]); ZZ_ = np.reshape(Z_,[-1,1]); grid_check = np.concatenate((XX_,np.ones(XX_.shape),YY_,np.ones(XX_.shape),ZZ_,np.zeros(XX_.shape)),axis=1); grid_eval = np.concatenate((XX,YY,0.0*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_eval_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_eval__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_evall = np.concatenate((XX,YY,0.0*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); grid_evall_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); grid_evall__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # Calculate number of parameters of the policy nofparams = 0; for i in xrange(len(layers)-1): nofparams += layers[i]*layers[i+1] + layers[i+1]; print 'Number of Params is: ' + str(nofparams) H_length = t_hor; center = np.array([[0.0,0.0,0.0,0.0,0.0,0.0]]) depth = 2.0; incl = 1.0; ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.1; #VAR num_ac = 3; iters = int(np.abs(t_hor)/dt)*renew + 1; ##################### INSTANTIATIONS ################# states,y,Tt,L,l_r,lb,reg, cross_entropy = TransDef("Control",False,layers,depth,incl,center); states_,y_,Tt_,L_,l_r_,lb_,reg_, cross_entropy_ = TransDef("Disturbance",False,layers,depth,incl,center); ola1 = tf.argmax(Tt,dimension=1) ola2 = tf.argmax(y,dimension=1) ola3 = tf.equal(ola1,ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)); ola1_ = tf.argmax(Tt_,dimension=1) ola2_ = tf.argmax(y_,dimension=1) ola3_ = tf.equal(ola1_,ola2_) accuracy_ = tf.reduce_mean(tf.cast(ola3_, tf.float32)); #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); C_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Control'); D_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Disturbance'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt,states)[0] grad_x = tf.slice(var_grad_,[0,0],[-1,layers[0]-1]); #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_not_zero.append(var.assign(tf.random_uniform(tf.shape(var),minval=-0.1,maxval=0.1))); set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0;#1.0**(-3.5);#0.01; beta = 0.00; #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01 ), (20000, 0.001 ), (30000, 0.0001 ), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom).minimize(L); train_step_ = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom).minimize(L_); #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64,shape=(None)); make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH sess = tf.Session(); init = tf.initialize_all_variables(); sess.run(init); def V_0(x): #return np.linalg.norm(x,ord=np.inf,axis=1,keepdims=True) - 1.0 return np.linalg.norm(x,axis=1,keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x,2.0*np.pi); return ALL_x; def F(ALL_x,opt_a,opt_b):#(grad,ALL_x): col1 = ALL_x[:,3,None] - opt_b[:,0,None] col2 = ALL_x[:,4,None] - opt_b[:,1,None] col3 = ALL_x[:,5,None] - opt_b[:,2,None] col4 = g*opt_a[:,0,None] col5 = -g*opt_a[:,1,None] col6 = opt_a[:,2,None] - g return np.concatenate((col1,col2,col3,col4,col5,col6),axis=1); # def F(ALL_x,opt_a,opt_b):#(grad,ALL_x): # col1 = ALL_x[:,3,None] - opt_b[:,0,None] # col2 = ALL_x[:,4,None] - opt_b[:,1,None] # col3 = ALL_x[:,5,None] - opt_b[:,2,None] # col4 = np.multiply(opt_a[:,2,None],opt_a[:,0,None]) # col5 = -np.multiply(opt_a[:,2,None],opt_a[:,1,None]) # col6 = opt_a[:,2,None] - g # # return np.concatenate((col1,col2,col3,col4,col5,col6),axis=1); ####################### RECURSIVE FUNC #################### def RK4(ALL_x,dtt,opt_a,opt_b): #Try Euler k1 = F(ALL_x,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k1); #ALL_tmp[:,4] = p_corr(ALL_tmp[:,4]); k2 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k2); #ALL_tmp[:,4] = p_corr(ALL_tmp[:,4]); k3 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt,k3); #ALL_tmp[:,4] = p_corr(ALL_tmp[:,4]); k4 = F(ALL_tmp,opt_a,opt_b); #### !!! Snx = ALL_x + np.multiply((dtt/6.0),(k1 + 2.0*k2 + 2.0*k3 + k4)); #np.multiply(dtt,k1) #Snx[:,4] = p_corr(Snx[:,4]); return Snx; perms = list(itertools.product([-1,1], repeat=num_ac)) true_ac_list = []; for i in range(len(perms)): #2**num_actions ac_tuple = perms[i]; ac_list = [(tmp1==1)*tmp3 + (tmp1==-1)*tmp2 for tmp1,tmp2,tmp3 in zip(ac_tuple,min_list,max_list)]; true_ac_list.append(ac_list); dist_ac = 3; perms_ = list(itertools.product([-1,1], repeat=dist_ac)) true_ac_list_ = []; for i in range(len(perms_)): #2**num_actions ac_tuple_ = perms_[i]; ac_list_ = [(tmp1==1)*tmp3 + (tmp1==-1)*tmp2 for tmp1,tmp2,tmp3 in zip(ac_tuple_,min_list_,max_list_)]; #ASSUMING: aMax = -aMin true_ac_list_.append(ac_list_); def Hot_to_Cold(hots,ac_list): a = hots.argmax(axis=1); a = np.asarray([ac_list[i] for i in a]); return a; def getPI(ALL_x,F_PI=[], F_PI_=[], subSamples=1): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(C_func_vars); current_params_ = sess.run(D_func_vars); #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states_ = []; for k in range((len(perms))): next_states = []; opt_a = np.asarray(true_ac_list[k])*np.ones([ALL_x.shape[0],1]); for i in range(len(perms_)): opt_b = np.asarray(true_ac_list_[i])*np.ones([ALL_x.shape[0],1]); Snx = ALL_x; for _ in range(subSamples): Snx = RK4(Snx,dt/float(subSamples),opt_a,opt_b); next_states.append(Snx); next_states_.append(np.concatenate(next_states,axis=0)); next_states_ = np.concatenate(next_states_,axis=0); values = V_0(next_states_[:,[0,1,2]]); for params,params_ in zip(F_PI,F_PI_): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])); for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])); tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) hots = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots,true_ac_list_) for _ in range(subSamples): next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); values = np.max((values,V_0(next_states_[:,[0,1,2]])),axis=0); values_ = values;#V_0(next_states_[:,[0,1,2]]); pre_compare_vals_ = values_.reshape([-1,ALL_x.shape[0]]).T; #Changed to values instead of values_ final_v = []; final_v_ = []; per = len(perms); for k in range(len(perms_)): final_v.append(np.argmax(pre_compare_vals_[:,k*per:(k+1)*per,None],axis=1)) final_v_.append(np.max(pre_compare_vals_[:,k*per:(k+1)*per,None],axis=1)) finalF = np.concatenate(final_v_,axis=1); index_best_a_ = np.argmin(finalF,axis=1); finalF_ = np.concatenate(final_v,axis=1); index_best_b_ = np.array([finalF_[k,index_best_a_[k]] for k in range(len(index_best_a_))]); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])); for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])); return sess.run(make_hot,{hot_input:index_best_a_}),sess.run(make_hot,{hot_input:index_best_b_}) def getActions(ALL_x,F_PI=[],F_PI_=[],StepsLeft=None): current_params = sess.run(C_func_vars); current_params_ = sess.run(D_func_vars); for ind in range(len(F_PI[len(F_PI)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(F_PI[len(F_PI)-StepsLeft][ind])); for ind in range(len(F_PI_[len(F_PI_)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(F_PI_[len(F_PI_)-StepsLeft][ind])); tmp = ConvCosSin(ALL_x); hots = sess.run(Tt,{states:tmp}); actions = np.argmax(hots,axis=1) A = np.reshape(actions,X.shape) plot_dict["Control_Actions"] = A hots_ = sess.run(Tt_,{states_:tmp}); actions_ = np.argmax(hots_,axis=1) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])); for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])); return actions,actions_ def getTraj(ALL_x,F_PI=[],F_PI_=[],subSamples=1,StepsLeft=None,Noise=False, Static=False, justV=False): current_params = sess.run(C_func_vars); current_params_ = sess.run(D_func_vars); if(StepsLeft == None): StepsLeft = len(F_PI); next_states_ = ALL_x; traj = [next_states_]; actions = []; values = V_0(next_states_[:,[0,1,2]]); if Static: steps = input("How Many Steps? ") for ind in range(len(F_PI[len(F_PI)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(F_PI[len(F_PI)-StepsLeft][ind])); for ind in range(len(F_PI_[len(F_PI_)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(F_PI_[len(F_PI_)-StepsLeft][ind])); for i in range(steps): for _ in range(subSamples): tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: hots_ = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots_,true_ac_list_) else: hots_ = np.zeros((1,2**dist_ac)); hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_,true_ac_list_) next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); if not justV: traj.append(next_states_); actions.append(hots.argmax(axis=1)[0]); values = np.max((values,V_0(next_states_[:,[0,1,2]])),axis=0); if i % 20 == 0: print(i) else: for params,params_ in zip(F_PI[len(F_PI)-StepsLeft:],F_PI_[len(F_PI_)-StepsLeft:]): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])); for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])); for _ in range(subSamples): tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: hots_ = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots_,true_ac_list_) else: hots_ = np.zeros((1,2**dist_ac)); hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_,true_ac_list_) next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); traj.append(next_states_); actions.append(hots.argmax(axis=1)[0]); values = np.max((values,V_0(next_states_[:,[0,1,2]])),axis=0); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])); for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])); # print(str(next_states_)) return traj,actions,values def ConvCosSin(ALL_x): pos = ALL_x[:,[0,1,2]]/5.0; vel = ALL_x[:,[3,4,5]]/10.0; ret_val = np.concatenate((pos,vel),axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time(); t = 0.0; mse = np.inf; k=0; kk = 0; beta=3.0; batch_size = bts; tau = 1000.0; steps = teps; ALL_PI = []; ALL_PI_= []; nunu = lr_schedule.value(k); act_color = ['r','g','b','y']; if(imp == 1.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies6D_P&Tc_h40_h40.pkl", "rb" ) ); cc = 0; while True: state_get = input('State: '); sub_smpl = input('SUBSAMPLING: '); pause_len = input('Pause: ') s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): ") stat = input("Static? (0/1): ") traj,act,_ = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat); act.append(act[-1]); all_to = np.concatenate(traj); plt.scatter(all_to[:,[0]],all_to[:,[2]],color=act_color[cc % len(act_color)]) plt.pause(pause_len); cc = cc + 1; #plt.colorbar() elif(imp == 2.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies6D_P&Tc_h40_h40.pkl", "rb" ) ); cc = 0; dist_bound = input("Distance: ") state_get = np.random.uniform(-5.0,5.0,(nrolls,layers[0])); state_get[:,:3] = dist_bound*state_get[:,:3]/np.linalg.norm(state_get[:,:3],axis=1,keepdims=True) sub_smpl = input('SUBSAMPLING: '); s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): "); stat = input("Static? (0/1): "); traj,act,values = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat,justV=True); values = values + 1.0 print(values.shape) filt = (values < dist_bound).T[0]; print(filt.shape) subset = state_get[filt] print(len(subset)) plt.hist(values,bins=100) plt.pause(10) tracking_error_bound = np.max(abs(subset[:,:3]),axis=0) print(tracking_error_bound) print(subset) save_dict = {} save_dict["weights"]=(ALL_PI,ALL_PI_) save_dict["c_layers"]=layers1 save_dict["d_layers"]=layers1 save_dict["control_bounds_upper"]= max_list save_dict["control_bounds_lower"]= min_list save_dict["tracking_error_bound"]= tracking_error_bound save_dict["planner_params"]={"max_speed":[0.5,0.5,0.5],"max_vel_dist":[0.0,0.0,0.0],"max_acc_dist":[0.0,0.0,0.0]} save_dict["normalization_args"] = [5.0,5.0,5.0,10.0,10.0,10.0] pickle.dump(save_dict,open( "TESTpolicies6D_PT_h40_h40.pkl", "wb" )); elif(imp==3.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies6D_P&Tc_h40_h40.pkl", "rb" ) ); plot_dict = {} plot_dict["upper_bound"] = np.array([2.8,2.8]) plot_dict["lower_bound"] = np.array([2.8,-2.8]) plot_dict["spacing_x_vx"] = np.array([100,100]) plot_dict["slice_points_y_vy_z_vz"] = np.array([0.0,0.0,0.0,0.0]) X = np.linspace(-2.8,2.8,100); VX = np.linspace(-2.8,2.8,100); X,VX = np.meshgrid(X, VX); X_ = np.reshape(X,[-1,1]); VX_ = np.reshape(VX,[-1,1]); #extra = np.random.uniform(-5.0,5.0,(len(X_),4)) extra = np.zeros(shape=(len(X_),2)) extra_ = np.zeros(shape=(len(X_),2)) state_get = np.concatenate((X_,extra,VX_,extra_),axis=1) sub_smpl = input('SUBSAMPLING: '); s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): "); stat = input("Static? (0/1): "); traj,act,values = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat,justV=True); values = values + 1.0 V = np.reshape(values,X.shape) plot_dict["Value"] = V A,A_ = getActions(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,StepsLeft=s_left) A = np.reshape(A,X.shape) A_ = np.reshape(A_,X.shape) plot_dict["Control_Actions"] = A plot_dict["Disturb_Actions"] = A_ # fig = plt.figure() # ax = fig.gca(projection='3d') # from matplotlib import cm # surf = ax.plot_surface(X, VX, V, cmap=cm.coolwarm, linewidth=0, antialiased=False) # ax.set_zlim(0.0, 5.01) # plt.pause(1000) # # while(True): # pass pickle.dump(plot_dict,open( "figures_info.pkl", "wb" )); else: for i in xrange(iters): if(np.mod(i,renew) == 0 and i is not 0): ALL_PI.insert(0,sess.run(C_func_vars)); ALL_PI_.insert(0,sess.run(D_func_vars)); # fig = plt.figure(1) # plt.clf(); # _,nn_vals,_ = getTraj(grid_check,ALL_PI,20) # fi = (np.abs(nn_vals) < 0.05) # mini_reach_ = grid_check[fi[:,0]] # ax = fig.add_subplot(111, projection='3d') # ax.scatter(mini_reach_[:,0], mini_reach_[:,2], mini_reach_[:,4]); # plt.pause(0.25); #plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... -> Solved: colors in a scatter plot only depend on the labels #plt.clf(); #ALL_xx = np.array([[-1.0,0.0,1.0,0.0,0.0,0.0], # [1.0,0.0,1.0,0.0,0.0,0.0], # [1.0,0.0,-1.0,0.0,0.0,0.0], # [-1.0,0.0,-1.0,0.0,0.0,0.0]]); #for tmmp in range(ALL_xx.shape[0]): # traj,act,_ = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=10); # #act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]])#c=[act_color[ii] for ii in act]); #plt.pause(0.25) # plt.figure(3) # d = 0.1 # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # ALL_xp = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0]-1)); # plt.subplot(2,3,1) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,2) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,3) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,4) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,5) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,6) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.pause(0.1); k = 0; ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0])); ALL_x[:,[3,4,5]] = ALL_x[:,[3,4,5]]*2.0 PI_c,PI_d = getPI(ALL_x,ALL_PI,ALL_PI_,subSamples=1); pre_ALL_x = ConvCosSin(ALL_x); ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0])); ALL_x_[:,[3,4,5]] = ALL_x_[:,[3,4,5]]*2.0 PI_c_,PI_d_ = getPI(ALL_x_,ALL_PI,ALL_PI_,subSamples=1); pre_ALL_x_ = ConvCosSin(ALL_x_); # tmp = np.random.randint(len(reach100s[:,:-1]), size=12000); # _,ZR = getPI(reach100s[tmp,:-1],ALL_PI) # #ZR = sess.run(Tt,{states:reach100s[:,:-1]}); # error1 = ZR - reach100s[tmp,-1,None]; # # # plt.figure(2) # _,Z000 = getPI(grid_eval,ALL_PI); # _,Z001 = getPI(grid_eval_,ALL_PI); # _,Z002 = getPI(grid_eval__,ALL_PI); # Z000 = np.reshape(Z000,X.shape); # Z001 = np.reshape(Z001,X.shape); # Z002 = np.reshape(Z002,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_out = (Z000 > 0.00) #| (Z000 < -0.05); # filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); # filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000[filter_out] = 0.0; # Z001[filter_out_] = 0.0; # Z002[filter_out__] = 0.0; # # _,Z000l = getPI(grid_evall,ALL_PI); # _,Z001l = getPI(grid_evall_,ALL_PI); # _,Z002l = getPI(grid_evall__,ALL_PI); # Z000l = np.reshape(Z000l,X.shape); # Z001l = np.reshape(Z001l,X.shape); # Z002l = np.reshape(Z002l,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); # filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); # filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000l[filter_outl] = 0.0; # Z001l[filter_out_l] = 0.0; # Z002l[filter_out__l] = 0.0; # # plt.clf(); # #plt.plot(ALL_t_, np.abs(allE), 'ro'); # #plt.axis([-1.0, 0.0, 0.0, 10.0]) # plt.subplot(2,3,1) # plt.imshow(Z000,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,2) # plt.imshow(Z001,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,3) # plt.imshow(Z002,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,4) # plt.imshow(Z000l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,5) # plt.imshow(Z001l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,6) # plt.imshow(Z002l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.pause(0.01); t = t - dt; print('Again.') # sess.run(set_to_not_zero); # print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str(np.mean(np.abs(error1))) + "|ITR=" + str(i) #VAR # plt.figure(4) # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # b_sele = (ALL_x[:,-1] < 6.1); # ALL_xp = ALL_x[b_sele]; # letsee_ = PI[b_sele]; # b_sele = (np.abs(ALL_xp[:,2]-np.pi/2.0 + 0.1) < 0.1); # ALL_xp = ALL_xp[b_sele]; # letsee_ = letsee_[b_sele]; # _,_ = getPI(ALL_xp); # #plt.subplot(2,3,1) #SUBPLOT # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.pause(0.01) # woot = np.array([[-0.15023694, -4.03420314, 1.56425333, 6.02741677], # [ 0.10373495, -4.34956515, 1.50186123, 6.08060291], # [ 0.13439703, -5.47363893, 1.60820922, 6.0519111 ], # [ 0.07739933, -4.93777028, 1.57579839, 6.00117299]]) # _,_ = getPI(woot,ALL_PI); #elif(i is 0): elif(np.mod(i,renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0])); ALL_x[:,[3,4,5]] = ALL_x[:,[3,4,5]]*2.0 PI_c,PI_d = getPI(ALL_x,F_PI=[],F_PI_=[],subSamples=1); pre_ALL_x = ConvCosSin(ALL_x); elapsed = time.time() - t print("Compute Data Time = "+str(elapsed)) ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0])); ALL_x_[:,[3,4,5]] = ALL_x_[:,[3,4,5]]*2.0 PI_c_,PI_d_ = getPI(ALL_x_,F_PI=[],F_PI_=[],subSamples=1); pre_ALL_x_ = ConvCosSin(ALL_x_); # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if(np.mod(i,200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy,{states:pre_ALL_x,y:PI_c}); test_acc = sess.run(accuracy,{states:pre_ALL_x_,y:PI_c_}); train_acc_ = sess.run(accuracy_,{states_:pre_ALL_x,y_:PI_d}); test_acc_ = sess.run(accuracy_,{states_:pre_ALL_x_,y_:PI_d_}); #o = np.random.randint(len(ALL_x)); print str(i) + ") control | TR_ACC = " + str(train_acc) + " | TE_ACC = " + str(test_acc) + " | Learning Rate = " + str(nunu) print str(i) + ") disturb | TR_ACC = " + str(train_acc_) + " | TE_ACC = " + str(test_acc_) + " | Learning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001#/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts); sess.run(train_step, feed_dict={states:pre_ALL_x[tmp],y:PI_c[tmp],nu:nunu}); sess.run(train_step_, feed_dict={states_:pre_ALL_x[tmp],y_:PI_d[tmp],nu:nunu}); #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump([ALL_PI,ALL_PI_],open( "policies6D_P&Tcoupled_h40_h40.pkl", "wb" ));
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q, num_ac): # Constants # The choices of n0, d1, d0 actually results in a very large # steady state error in the pitch/roll; this seems to be # expected according to Pat's report n0 = 10 # Angular dynamics parameters d1 = 8 d0 = 10 kT = 0.91 # Thrust coefficient (vertical direction) grav = 9.81 # Acceleration due to gravity (for convenience) m = 1.3 # Mass # Quad Params max_list = [10 * np.pi / 180.0, 10 * np.pi / 180.0, 2.0 * grav] min_list = [-10 * np.pi / 180.0, -10 * np.pi / 180.0, 0.0] print 'Starting worker-' + str(ind) f = 1 Nx = 100 * f + 1 minn = [-5.0, -10.0, 0.0, -10.0, -5.0, -10.0, 0.0, -10.0, -5.0, -10.0] maxx = [5.0, 10.0, 2 * np.pi, 10.0, 5.0, 10.0, 2 * np.pi, 10.0, 5.0, 10.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[2], maxx[2], Nx) Z = np.linspace(minn[4], maxx[4], Nx) X_, Y_, Z_ = np.meshgrid(X, Y, Z) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) XX_ = np.reshape(X_, [-1, 1]) YY_ = np.reshape(Y_, [-1, 1]) ZZ_ = np.reshape(Z_, [-1, 1]) grid_check = np.concatenate( (XX_, -1.5 * np.ones(XX_.shape), np.zeros( XX_.shape), np.zeros(XX_.shape), YY_, -1.8 * np.ones(XX_.shape), np.zeros(XX_.shape), np.zeros( XX_.shape), ZZ_, 1.2 * np.zeros(XX_.shape)), axis=1) # grid_eval = np.concatenate((XX,YY,0.0*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_eval_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_eval__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_evall = np.concatenate((XX,YY,0.0*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # grid_evall_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # grid_evall__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # Calculate number of parameters of the policy nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 iters = int(np.abs(t_hor) / dt) * renew + 1 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Critic", False, layers, depth, incl, center) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic') #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): return np.linalg.norm(x, ord=np.inf, axis=1, keepdims=True) - 1.0 #return np.linalg.norm(x,axis=1,keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): col1 = ALL_x[:, 1, None] col2 = grav * np.tan(ALL_x[:, 2, None]) col3 = -d1 * ALL_x[:, [2]] + ALL_x[:, [3]] col4 = -d0 * ALL_x[:, [2]] + n0 * opt_a[:, 0, None] col5 = ALL_x[:, 5, None] col6 = grav * np.cos(ALL_x[:, 6, None]) col7 = -d1 * ALL_x[:, [6]] + ALL_x[:, [7]] col8 = -d0 * ALL_x[:, [6]] + n0 * opt_a[:, 1, None] col9 = ALL_x[:, 9, None] col10 = kT * opt_a[:, 2, None] return np.concatenate( (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10), axis=1) # Dynamics of the 10D Quadrotor # x_dot \dot x_1 = x_2 # vx_dot \dot x_2 = g * tan(x_3) # tx_dot \dot x_3 = -d1 * x_3 + x_4 # wx_dot \dot x_4 = -d0 * x_3 + n0 * u1 # y_dot \dot x_5 = x_6 # vy_dot \dot x_6 = g * tan(x_7) # ty_dot \dot x_7 = -d1 * x_7 + x_8 # wy_dot \dot x_8 = -d0 * x_7 + n0 * u2 # z_dot \dot x_9 = x_10 # vz_dot \dot x_10 = kT * u3 - g # uMin <= [u1; u2; u3] <= uMax # dMin <= [d1; d2; d3] <= dMax ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, [2, 6]] = p_corr(ALL_tmp[:, [2, 6]]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, [2, 6]] = p_corr(ALL_tmp[:, [2, 6]]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, [2, 6]] = p_corr(ALL_tmp[:, [2, 6]]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) #np.multiply(dtt,k1) Snx[:, [2, 6]] = p_corr(Snx[:, [2, 6]]) return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [(tmp1 == 1) * tmp3 + (tmp1 == -1) * tmp2 for tmp1, tmp2, tmp3 in zip(ac_tuple, min_list, max_list)] #ASSUMING: aMax = -aMin true_ac_list.append(ac_list) def Hot_to_Cold(hots, ac_list): a = hots.argmax(axis=1) a = np.asarray([ac_list[i] for i in a]) return a def getPI( ALL_x, F_PI=[], subSamples=1 ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta) #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = [] for i in range(len(perms)): opt_a = np.asarray(true_ac_list[i]) * np.ones([ALL_x.shape[0], 1]) Snx = ALL_x for _ in range(subSamples): Snx = RK4(Snx, dt / float(subSamples), opt_a, None) next_states.append(Snx) next_states = np.concatenate(next_states, axis=0) for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) values_ = V_0(next_states[:, [0, 4, 8]]) compare_vals_ = values_.reshape([-1, ALL_x.shape[0]]).T index_best_a_ = compare_vals_.argmin(axis=1) #Changed to ARGMAX values_ = np.min(compare_vals_, axis=1, keepdims=True) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) return sess.run(make_hot, {hot_input: index_best_a_}), values_ def getTraj(ALL_x, F_PI=[], subSamples=1, Noise=False): current_params = sess.run(theta) next_states = ALL_x traj = [next_states] actions = [] for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) if Noise: next_states = next_states + np.random.normal( size=next_states.shape) * 0.01 traj.append(next_states) actions.append(hots.argmax(axis=1)[0]) #values = np.min((values,V_0(next_states[:,[0,1]])),axis=0); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) return traj, V_0(next_states[:, [0, 4, 8]]), actions def ConvCosSin(ALL_x): sin_phi = np.sin(ALL_x[:, [2, 6]]) cos_phi = np.cos(ALL_x[:, [2, 6]]) pos = ALL_x[:, [0, 4, 8]] / 5.0 vel = ALL_x[:, [1, 5, 9]] / 10.0 arate = ALL_x[:, [3, 7]] / 30.0 ret_val = np.concatenate((pos, vel, arate, sin_phi, cos_phi), axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] nunu = lr_schedule.value(k) if (imp == 1.0): ALL_PI = pickle.load(open("policies6D_h20_h20.pkl", "rb")) while (imp == 1.0): state_get = input('State: ') sub_smpl = input('SUBSAMPLING: ') pause_len = input('Pause: ') traj, VAL, act = getTraj(state_get, F_PI=ALL_PI, subSamples=sub_smpl, Noise=False) act.append(act[-1]) all_to = np.concatenate(traj) plt.scatter(all_to[:, [0]], all_to[:, [2]], c=act) #plt.colorbar() plt.pause(pause_len) print(str(VAL)) # print(str(traj)); for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(theta)) fig = plt.figure(1) plt.clf() _, nn_vals, _ = getTraj(grid_check, ALL_PI, 20) fi = (np.abs(nn_vals) < 0.05) mini_reach_ = grid_check[fi[:, 0]] ax = fig.add_subplot(111, projection='3d') ax.scatter(mini_reach_[:, 0], mini_reach_[:, 4], mini_reach_[:, 8]) plt.pause(0.25) # plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... # plt.clf(); # ALL_xx = np.array([[0.0,0.0,1.0,0.0,0.0,0.0], # [0.0,0.0,1.0,0.0,np.pi/4,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 - 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.7,0.0], # [0.0,0.0,1.0,0.0,np.pi,0.0]]); # for tmmp in range(ALL_xx.shape[0]): # traj,_,act = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,subSamples=10); # act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]],c=act); # plt.pause(0.25) # plt.figure(3) # d = 0.1 # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # ALL_xp = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0]-1)); # plt.subplot(2,3,1) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,2) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,3) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,4) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,5) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,6) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.pause(0.1); t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 2)) ALL_x[:, [1, 5, 9]] = ALL_x[:, [1, 5, 9]] * 2.0 ALL_x[:, [2, 6]] = ALL_x[:, [2, 6]] * np.pi / 5.0 + np.pi ALL_x[:, [3, 7]] = ALL_x[:, [3, 7]] * 6.0 PI, _ = getPI(ALL_x, F_PI=ALL_PI, subSamples=3) pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 2)) ALL_x_[:, [1, 5, 9]] = ALL_x_[:, [1, 5, 9]] * 2.0 ALL_x_[:, [2, 6]] = ALL_x_[:, [2, 6]] * np.pi / 5.0 + np.pi ALL_x_[:, [3, 7]] = ALL_x_[:, [3, 7]] * 6.0 PI_, _ = getPI(ALL_x_, F_PI=ALL_PI, subSamples=3) pre_ALL_x_ = ConvCosSin(ALL_x_) # tmp = np.random.randint(len(reach100s[:,:-1]), size=12000); # _,ZR = getPI(reach100s[tmp,:-1],ALL_PI) # #ZR = sess.run(Tt,{states:reach100s[:,:-1]}); # error1 = ZR - reach100s[tmp,-1,None]; # # # plt.figure(2) # _,Z000 = getPI(grid_eval,ALL_PI); # _,Z001 = getPI(grid_eval_,ALL_PI); # _,Z002 = getPI(grid_eval__,ALL_PI); # Z000 = np.reshape(Z000,X.shape); # Z001 = np.reshape(Z001,X.shape); # Z002 = np.reshape(Z002,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_out = (Z000 > 0.00) #| (Z000 < -0.05); # filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); # filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000[filter_out] = 0.0; # Z001[filter_out_] = 0.0; # Z002[filter_out__] = 0.0; # # _,Z000l = getPI(grid_evall,ALL_PI); # _,Z001l = getPI(grid_evall_,ALL_PI); # _,Z002l = getPI(grid_evall__,ALL_PI); # Z000l = np.reshape(Z000l,X.shape); # Z001l = np.reshape(Z001l,X.shape); # Z002l = np.reshape(Z002l,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); # filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); # filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000l[filter_outl] = 0.0; # Z001l[filter_out_l] = 0.0; # Z002l[filter_out__l] = 0.0; # # plt.clf(); # #plt.plot(ALL_t_, np.abs(allE), 'ro'); # #plt.axis([-1.0, 0.0, 0.0, 10.0]) # plt.subplot(2,3,1) # plt.imshow(Z000,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,2) # plt.imshow(Z001,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,3) # plt.imshow(Z002,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,4) # plt.imshow(Z000l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,5) # plt.imshow(Z001l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,6) # plt.imshow(Z002l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.pause(0.01); t = t - dt print('Again.') # sess.run(set_to_not_zero); # print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str(np.mean(np.abs(error1))) + "|ITR=" + str(i) #VAR # plt.figure(4) # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # b_sele = (ALL_x[:,-1] < 6.1); # ALL_xp = ALL_x[b_sele]; # letsee_ = PI[b_sele]; # b_sele = (np.abs(ALL_xp[:,2]-np.pi/2.0 + 0.1) < 0.1); # ALL_xp = ALL_xp[b_sele]; # letsee_ = letsee_[b_sele]; # _,_ = getPI(ALL_xp); # #plt.subplot(2,3,1) #SUBPLOT # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.pause(0.01) # woot = np.array([[-0.15023694, -4.03420314, 1.56425333, 6.02741677], # [ 0.10373495, -4.34956515, 1.50186123, 6.08060291], # [ 0.13439703, -5.47363893, 1.60820922, 6.0519111 ], # [ 0.07739933, -4.93777028, 1.57579839, 6.00117299]]) # _,_ = getPI(woot,ALL_PI); #elif(i is 0): elif (np.mod(i, renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 2)) ALL_x[:, [1, 5, 9]] = ALL_x[:, [1, 5, 9]] * 2.0 ALL_x[:, [2, 6]] = ALL_x[:, [2, 6]] * np.pi / 5.0 + np.pi ALL_x[:, [3, 7]] = ALL_x[:, [3, 7]] * 6.0 PI, _ = getPI(ALL_x, F_PI=[], subSamples=3) pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 2)) ALL_x_[:, [1, 5, 9]] = ALL_x_[:, [1, 5, 9]] * 2.0 ALL_x_[:, [2, 6]] = ALL_x_[:, [2, 6]] * np.pi / 5.0 + np.pi ALL_x_[:, [3, 7]] = ALL_x_[:, [3, 7]] * 6.0 PI_, _ = getPI(ALL_x_, F_PI=[], subSamples=3) pre_ALL_x_ = ConvCosSin(ALL_x_) # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: pre_ALL_x, y: PI }) test_acc = sess.run(accuracy, { states: pre_ALL_x_, y: PI_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Lerning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001 #/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: pre_ALL_x[tmp], y: PI[tmp], nu: nunu }) #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump(ALL_PI, open("policies6D_L2_h20_h20.pkl", "wb"))
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Quad Params aMax = 3.0 aMin = -1.0 * aMax wMax = 2 * np.pi / 10.0 wMin = -1.0 * wMax max_list = [wMax, aMax] print 'Starting worker-' + str(ind) f = 1 Nx = 100 * f + 1 minn = [-5.0, -5.0, 0.0, 6.0] maxx = [5.0, 5.0, 2 * np.pi, 12.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[1], maxx[1], Nx) Z = np.linspace(minn[2], maxx[2], Nx) X_, Y_, Z_ = np.meshgrid(X, Y, Z) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) XX_ = np.reshape(X_, [-1, 1]) YY_ = np.reshape(Y_, [-1, 1]) ZZ_ = np.reshape(Z_, [-1, 1]) grid_check = np.concatenate((XX_, YY_, ZZ_, 6.0 * np.ones(XX_.shape)), axis=1) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) reach100s = sio.loadmat('flat_1s.mat') reach100s = reach100s["M"] reach100s[:, [1, 2]] = reach100s[:, [2, 1]] reach100s[:, 2] = np.mod(reach100s[:, 2], 2.0 * np.pi) fi = (np.abs(reach100s[:, -1]) < 0.05) mini_reach = reach100s[fi] fi = (mini_reach[:, 3] == 6.0) mini_reach = mini_reach[fi] #mean_data = np.mean(reach100s[:,:-1],axis=0); #std_data = np.std(reach100s[:,:-1],axis=0); nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor #-1.0; #Has to be negative #VA #VAR #center = np.array([[0.0,0.0]]) center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 #VAR num_ac = 2 iters = int(np.abs(t_hor) / dt) * renew + 1 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Critic", False, layers, depth, incl, center) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic') #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 4, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): return np.linalg.norm(x, ord=np.inf, axis=1, keepdims=True) - 2.0 #return np.linalg.norm(x,axis=1,keepdims=True) - 2.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): sin_phi = np.around(np.sin(ALL_x[:, 2, None]), 5) #<+++++ ROUNDOFF ERROR!!! cos_phi = np.around(np.cos(ALL_x[:, 2, None]), 5) col1 = np.multiply(ALL_x[:, 3, None], cos_phi) col2 = np.multiply(ALL_x[:, 3, None], sin_phi) col3 = opt_a[:, 0, None] col4 = opt_a[:, 1, None] return np.concatenate((col1, col2, col3, col4), axis=1) ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) #np.multiply(dtt,k1) Snx[:, 2] = p_corr(Snx[:, 2]) return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [tmp1 * tmp2 for tmp1, tmp2 in zip(ac_tuple, max_list)] #ASSUMING: aMax = -aMin true_ac_list.append(ac_list) def Hot_to_Cold(hots, ac_list): a = hots.argmax(axis=1) a = np.asarray([ac_list[i] for i in a]) return a def Normalize(ALL_x): ALL_x[:, [0, 1]] = ALL_x[:, [0, 1]] / 5.0 ALL_x[:, [2]] = (ALL_x[:, [2]] - np.pi) / np.pi ALL_x[:, [3]] = (ALL_x[:, [3]] - 9.0) / 3.0 return ALL_x def getPI( ALL_x, F_PI=[], ret_traj=False, subSamples=1 ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta) if (ret_traj): traj = [] #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = [] for i in range(len(perms)): opt_a = np.asarray(true_ac_list[i]) * np.ones([ALL_x.shape[0], 1]) Snx = ALL_x for _ in range(subSamples): Snx = RK4(Snx, dt / float(subSamples), opt_a, None) next_states.append(Snx) next_states = np.concatenate(next_states, axis=0) if (ret_traj): traj.append(next_states) values = V_0(next_states[:, [0, 1]]) for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) if (ret_traj): traj.append(next_states) values = np.min((values, V_0(next_states[:, [0, 1]])), axis=0) compare_vals = values.reshape([-1, ALL_x.shape[0]]).T index_best_a = compare_vals.argmin( axis=1) #.reshape([-1,1]); #Changed to ARGMAX values = np.min(compare_vals, axis=1, keepdims=True) #Changed to MAX final_values = np.min((values, V_0(ALL_x[:, [0, 1]])), axis=0) values_ = V_0(next_states[:, [0, 1]]) compare_vals_ = values_.reshape([-1, ALL_x.shape[0]]).T index_best_a_ = compare_vals_.argmin(axis=1) values_ = np.min(compare_vals_, axis=1, keepdims=True) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) #return index_best_a,final_values if (ret_traj): return sess.run(make_hot, {hot_input: index_best_a_}), values_, traj #return sess.run(make_hot,{hot_input:index_best_a}),final_values return sess.run(make_hot, {hot_input: index_best_a_}), values_ def getTraj(ALL_x, F_PI=[], subSamples=1): current_params = sess.run(theta) next_states = ALL_x traj = [next_states] actions = [] for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) traj.append(next_states) actions.append(hots.argmax(axis=1)[0]) #values = np.min((values,V_0(next_states[:,[0,1]])),axis=0); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) return traj, V_0(next_states[:, [0, 1]]), actions def ConvCosSin(ALL_x): sin_phi = np.sin(ALL_x[:, 2, None]) cos_phi = np.cos(ALL_x[:, 2, None]) insertion = np.concatenate((sin_phi, cos_phi), axis=1) ret_val = np.insert(ALL_x[:, [0, 1, 3]], 2, insertion.T, axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # ( ) # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] nunu = lr_schedule.value(k) if (imp == 1.0): ALL_PI = pickle.load(open("policies8.pkl", "rb")) while (imp == 1.0): state_get = input('State: ') sub_smpl = input('SUBSAMPLING: ') pause_len = input('Pause: ') traj, VAL, act = getTraj(state_get, ALL_PI, sub_smpl) act.append(act[-1]) all_to = np.concatenate(traj) plt.scatter(all_to[:, [0]], all_to[:, [1]], c=act) plt.pause(pause_len) print(str(VAL)) # print(str(traj)); for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(theta)) # plt.figure(1) # plt.clf(); # letsee = sess.run(ola3,{states:pre_ALL_x,y:PI}) # letsee = np.array([not(i) for i in letsee]) # ALL_xx = pre_ALL_x[letsee] # mhm = ALL_x[letsee] # cc = (ALL_xx[:,-1]-6.0)/6.0 # #plt.scatter(ALL_xx[:,0],ALL_xx[:,1],c=cc) # plt.scatter(mhm[:,2],ALL_xx[:,0],c=cc) # plt.pause(0.01); fig = plt.figure(1) plt.clf() ax = fig.add_subplot(121, projection='3d') ax.scatter(mini_reach[:, 0], mini_reach[:, 1], mini_reach[:, 2]) _, nn_vals = getPI(grid_check, ALL_PI) fi = (np.abs(nn_vals) < 0.05) mini_reach_ = grid_check[fi[:, 0]] ax = fig.add_subplot(122, projection='3d') ax.scatter(mini_reach_[:, 0], mini_reach_[:, 1], mini_reach_[:, 2]) plt.pause(0.01) plt.figure(3) d = 0.1 plt.clf() plt.title( str([ str(i) + " : " + str(perms[i]) for i in range(len(perms)) ])) ALL_xp = np.random.uniform(-6.0, 6.0, (nrolls / 100, layers[0] - 1)) plt.subplot(2, 3, 1) #SUBPLOT ALL_xp[:, 2] = 0.0 + d #np.pi/2.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.subplot(2, 3, 2) #SUBPLOT ALL_xp[:, 2] = np.pi / 2 + d #2*np.pi/3.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.subplot(2, 3, 3) #SUBPLOT ALL_xp[:, 2] = np.pi + d #4*np.pi/3.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.subplot(2, 3, 4) #SUBPLOT ALL_xp[:, 2] = 0.0 - d #np.pi/2.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.subplot(2, 3, 5) #SUBPLOT ALL_xp[:, 2] = np.pi / 2 - d #2*np.pi/3.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.subplot(2, 3, 6) #SUBPLOT ALL_xp[:, 2] = np.pi - d #4*np.pi/3.0; ALL_xp[:, 3] = 9.0 letsee_ = sess.run(Tt, {states: ConvCosSin(ALL_xp)}) letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() plt.pause(0.01) k = 0 ALL_x = np.random.uniform(-6.0, 6.0, (nrolls, layers[0] - 1)) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 6.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 6.0 + 9.0 PI, _ = getPI(ALL_x, ALL_PI) pre_ALL_x = ConvCosSin(ALL_x) ALL_x_ = np.random.uniform(-6.0, 6.0, (nrolls / 100, layers[0] - 1)) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 6.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 6.0 + 9.0 PI_, _ = getPI(ALL_x_, ALL_PI) pre_ALL_x_ = ConvCosSin(ALL_x_) tmp = np.random.randint(len(reach100s[:, :-1]), size=12000) _, ZR = getPI(reach100s[tmp, :-1], ALL_PI) #ZR = sess.run(Tt,{states:reach100s[:,:-1]}); error1 = ZR - reach100s[tmp, -1, None] plt.figure(2) _, Z000 = getPI(grid_eval, ALL_PI) _, Z001 = getPI(grid_eval_, ALL_PI) _, Z002 = getPI(grid_eval__, ALL_PI) Z000 = np.reshape(Z000, X.shape) Z001 = np.reshape(Z001, X.shape) Z002 = np.reshape(Z002, X.shape) #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); filter_out = (Z000 > 0.00) #| (Z000 < -0.05); filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); #Z000[filter_in] = 1.0; Z000[filter_out] = 0.0 Z001[filter_out_] = 0.0 Z002[filter_out__] = 0.0 _, Z000l = getPI(grid_evall, ALL_PI) _, Z001l = getPI(grid_evall_, ALL_PI) _, Z002l = getPI(grid_evall__, ALL_PI) Z000l = np.reshape(Z000l, X.shape) Z001l = np.reshape(Z001l, X.shape) Z002l = np.reshape(Z002l, X.shape) #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); #Z000[filter_in] = 1.0; Z000l[filter_outl] = 0.0 Z001l[filter_out_l] = 0.0 Z002l[filter_out__l] = 0.0 plt.clf() #plt.plot(ALL_t_, np.abs(allE), 'ro'); #plt.axis([-1.0, 0.0, 0.0, 10.0]) plt.subplot(2, 3, 1) plt.imshow(Z000, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.subplot(2, 3, 2) plt.imshow(Z001, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.subplot(2, 3, 3) plt.imshow(Z002, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.subplot(2, 3, 4) plt.imshow(Z000l, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.subplot(2, 3, 5) plt.imshow(Z001l, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.subplot(2, 3, 6) plt.imshow(Z002l, cmap='gray') plt.plot([30 * f, 30 * f], [30 * f, 70 * f], 'r-', lw=1) plt.plot([30 * f, 70 * f], [70 * f, 70 * f], 'r-', lw=1) plt.plot([70 * f, 70 * f], [70 * f, 30 * f], 'r-', lw=1) plt.plot([70 * f, 30 * f], [30 * f, 30 * f], 'r-', lw=1) plt.pause(0.01) t = t - dt #sess.run(set_to_not_zero); print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str( np.mean(np.abs(error1))) + "|ITR=" + str(i) #VAR # plt.figure(4) # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # b_sele = (ALL_x[:,-1] < 6.1); # ALL_xp = ALL_x[b_sele]; # letsee_ = PI[b_sele]; # b_sele = (np.abs(ALL_xp[:,2]-np.pi/2.0 + 0.1) < 0.1); # ALL_xp = ALL_xp[b_sele]; # letsee_ = letsee_[b_sele]; # _,_ = getPI(ALL_xp); # #plt.subplot(2,3,1) #SUBPLOT # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.pause(0.01) # woot = np.array([[-0.15023694, -4.03420314, 1.56425333, 6.02741677], # [ 0.10373495, -4.34956515, 1.50186123, 6.08060291], # [ 0.13439703, -5.47363893, 1.60820922, 6.0519111 ], # [ 0.07739933, -4.93777028, 1.57579839, 6.00117299]]) # _,_ = getPI(woot,ALL_PI); #elif(i is 0): elif (np.mod(i, renew) == 0 and i is 0): k = 0 # sess.run(set_to_zero); ALL_x = np.random.uniform(-6.0, 6.0, (nrolls, layers[0] - 1)) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 6.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 6.0 + 9.0 PI, _ = getPI(ALL_x) pre_ALL_x = ConvCosSin(ALL_x) # load = sio.loadmat('opt_a_0.5.mat'); # load = load['data_s'] # ALL_x = load[:,:-1] # ALL_x[:,2] = ALL_x[:,2] + np.pi; # pre_ALL_x = ConvCosSin(ALL_x); # acts = load[:,-1,None] # PI = sess.run(make_hot,{hot_input:acts[:,0]}) ALL_x_ = np.random.uniform(-6.0, 6.0, (nrolls / 100, layers[0] - 1)) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 6.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 6.0 + 9.0 PI_, _ = getPI(ALL_x_) pre_ALL_x_ = ConvCosSin(ALL_x_) # sess.run(set_to_not_zero); plt.figure(4) #plt.clf(); plt.title( str([ str(i) + " : " + str(perms[i]) for i in range(len(perms)) ])) b_sele = (np.abs(ALL_x[:, -1] - 7.5) < 0.1) ALL_xp = ALL_x[b_sele] letsee_ = PI[b_sele] b_sele = (ALL_xp[:, 2] < 0.1) ALL_xp = ALL_xp[b_sele] letsee_ = letsee_[b_sele] #plt.subplot(2,3,1) #SUBPLOT letsee_ = letsee_.argmax(axis=1) plt.scatter(ALL_xp[:, 0], ALL_xp[:, 1], c=letsee_) plt.colorbar() # plt.subplot(2,3,2) #SUBPLOT # ALL_xp[:,2] = np.pi/2.0; # ALL_xp[:,3] = 6.0; # letsee_ = sess.run(Tt,{states:ALL_xp}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.subplot(2,3,3) #SUBPLOT # ALL_xp[:,2] = np.pi; # ALL_xp[:,3] = 6.0; # letsee_ = sess.run(Tt,{states:ALL_xp}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.subplot(2,3,4) #SUBPLOT # ALL_xp[:,2] = 0.0#np.pi/2.0; # ALL_xp[:,3] = 12.0; # letsee_ = sess.run(Tt,{states:ALL_xp}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.subplot(2,3,5) #SUBPLOT # ALL_xp[:,2] = np.pi/2.0; # ALL_xp[:,3] = 12.0; # letsee_ = sess.run(Tt,{states:ALL_xp}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.subplot(2,3,6) #SUBPLOT # ALL_xp[:,2] = np.pi; # ALL_xp[:,3] = 12.0; # letsee_ = sess.run(Tt,{states:ALL_xp}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() plt.pause(0.01) # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: pre_ALL_x, y: PI }) test_acc = sess.run(accuracy, { states: pre_ALL_x_, y: PI_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Lerning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001 #/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: pre_ALL_x[tmp], y: PI[tmp], nu: nunu }) #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump(ALL_PI, open("policies4D_reach_h20_h20.pkl", "wb"))
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Quad Params max_list = [0.1, 0.1, 11.81] min_list = [-0.1, -0.1, 7.81] g = 9.81 print 'Starting worker-' + str(ind) f = 1 Nx = 100 * f + 1 minn = [-5.0, -10.0, -5.0, -10.0, 0.0, -10.0] maxx = [5.0, 10.0, 5.0, 10.0, 2 * np.pi, 10.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[2], maxx[2], Nx) Z = np.linspace(minn[4], maxx[4], Nx) X_, Y_, Z_ = np.meshgrid(X, Y, Z) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) XX_ = np.reshape(X_, [-1, 1]) YY_ = np.reshape(Y_, [-1, 1]) ZZ_ = np.reshape(Z_, [-1, 1]) grid_check = np.concatenate((XX_, np.ones( XX_.shape), YY_, np.ones(XX_.shape), ZZ_, np.zeros(XX_.shape)), axis=1) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) # Calculate number of parameters of the policy nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 #VAR num_ac = 3 iters = int(np.abs(t_hor) / dt) * renew + 1 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Critic", False, layers, depth, incl, center) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic') #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): return np.linalg.norm(x, ord=np.inf, axis=1, keepdims=True) - 1.0 #return np.linalg.norm(x,axis=1,keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): #(grad,ALL_x): col1 = ALL_x[:, 1, None] - opt_b[:, 0, None] col2 = ALL_x[:, 2, None] - opt_b[:, 1, None] col3 = ALL_x[:, 3, None] - opt_b[:, 2, None] col4 = g * opt_a[:, 0, None] col5 = -g * opt_a[:, 1, None] col6 = opt_a[:, 2, None] - g return np.concatenate((col1, col2, col3, col4, col5, col6), axis=1) ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) #ALL_tmp[:,[4]] = p_corr(ALL_tmp[:,4]); k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) #ALL_tmp[:,4] = p_corr(ALL_tmp[:,4]); k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) #ALL_tmp[:,4] = p_corr(ALL_tmp[:,4]); k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) #np.multiply(dtt,k1) #Snx[:,4] = p_corr(Snx[:,4]); return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [(tmp1 == 1) * tmp3 + (tmp1 == -1) * tmp2 for tmp1, tmp2, tmp3 in zip(ac_tuple, min_list, max_list)] true_ac_list.append(ac_list) def Hot_to_Cold(hots, ac_list): a = hots.argmax(axis=1) a = np.asarray([ac_list[i] for i in a]) return a def getPI( ALL_x, F_PI=[], subSamples=1 ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta) #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = [] for i in range(len(perms)): opt_a = np.asarray(true_ac_list[i]) * np.ones([ALL_x.shape[0], 1]) Snx = ALL_x for _ in range(subSamples): Snx = RK4(Snx, dt / float(subSamples), opt_a, None) next_states.append(Snx) next_states = np.concatenate(next_states, axis=0) values = V_0(next_states[:, [0, 1, 2]]) for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) values = np.min((values, V_0(next_states[:, [0, 1, 2]])), axis=0) values_ = V_0(next_states[:, [0, 1, 2]]) compare_vals_ = values_.reshape([-1, ALL_x.shape[0]]).T #Changed to values instead of values_ index_best_a_ = compare_vals_.argmin(axis=1) #Changed to ARGMIN values_ = np.min(compare_vals_, axis=1, keepdims=True) filterr = np.max(compare_vals_, axis=1) > -1.0 index_best_a_ = index_best_a_[filterr] values_ = values_[filterr] print("States filtered out: " + str(len(filterr) - np.sum(filterr))) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) return sess.run(make_hot, {hot_input: index_best_a_}), values_, filterr # def getTraj(ALL_x,F_PI=[],subSamples=1,StepsLeft=None,Noise = False): # # current_params = sess.run(theta); # # if(StepsLeft == None): StepsLeft = len(F_PI); # # next_states = ALL_x; # traj = [next_states]; # actions = []; # # for params in F_PI[len(F_PI)-StepsLeft:]: # for ind in range(len(params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(params[ind])); # # hots = sess.run(Tt,{states:ConvCosSin(next_states)}); # opt_a = Hot_to_Cold(hots,true_ac_list) # for _ in range(subSamples): # next_states = RK4(next_states,dt/float(subSamples),opt_a,None); # if Noise: # next_states = next_states + np.random.normal(size=next_states.shape)*0.01 # traj.append(next_states); # actions.append(hots.argmax(axis=1)[0]); # #values = np.min((values,V_0(next_states[:,[0,1]])),axis=0); # # for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(current_params[ind])); # # return traj,V_0(next_states[:,[0,2]]),actions; # def RSScatterPlot(F_PI=[],v_slice=[1,1,0],s_left=None): # if(s_left == None): s_left = len(F_PI); # ALL_x = np.random.uniform(-5.0,5.0,(nrolls/10,layers[0]-1)); # ALL_x[:,1] = v_slice[0] # ALL_x[:,3] = v_slice[1] # ALL_x[:,4] = ALL_x[:,4]*np.pi/5.0 + np.pi; # ALL_x[:,5] = v_slice[2] # _,VAL,_ = getTraj(ALL_x,F_PI=F_PI,subSamples=4,StepsLeft=s_left,Noise=False); # fi = (VAL < 0.0) # mini_reach_ = ALL_x[fi[:,0]] # fig = plt.figure(1) # ax = fig.add_subplot(111, projection='3d') # ax.scatter(mini_reach_[:,0], mini_reach_[:,2], mini_reach_[:,4]); # plt.pause(20); def ConvCosSin(ALL_x): pos = ALL_x[:, [0, 1, 2]] / 5.0 vel = ALL_x[:, [3, 4, 5]] / 10.0 ret_val = np.concatenate((pos, vel), axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] nunu = lr_schedule.value(k) # act_color = ['r','g','b','y']; # if(imp == 1.0): # ALL_PI = pickle.load( open( "policies6D_h20_h20.pkl", "rb" ) ); # while (imp == 1.0): # state_get = input('State: '); # sub_smpl = input('SUBSAMPLING: '); # pause_len = input('Pause: ') # s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") # traj,VAL,act = getTraj(state_get,F_PI=ALL_PI,subSamples=sub_smpl,StepsLeft=s_left,Noise=False); # act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]],c=[act_color[i] for i in act]) # #plt.colorbar() # plt.pause(pause_len) # print(str(VAL)); # elif(imp==2.0): # ALL_PI = pickle.load( open( "policies6D_h20_h20.pkl", "rb" ) ); # RSScatterPlot(F_PI=ALL_PI,v_slice=[1.0,1.0,0],s_left=6) # exit() for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(theta)) # fig = plt.figure(1) # plt.clf(); # _,nn_vals,_ = getTraj(grid_check,ALL_PI,20) # fi = (np.abs(nn_vals) < 0.05) # mini_reach_ = grid_check[fi[:,0]] # ax = fig.add_subplot(111, projection='3d') # ax.scatter(mini_reach_[:,0], mini_reach_[:,2], mini_reach_[:,4]); # plt.pause(0.25); # # plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... -> Solved: colors in a scatter plot only depend on the labels # plt.clf(); # ALL_xx = np.array([[0.0,0.0,1.0,0.0,0.0,0.0], # [0.0,0.0,1.0,0.0,np.pi/4,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 - 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.7,0.0], # [0.0,0.0,1.0,0.0,np.pi,0.0]]); # for tmmp in range(ALL_xx.shape[0]): # traj,_,act = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,subSamples=10); # act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]],c=act); # plt.pause(0.25) # # plt.figure(3) # d = 0.1 # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # ALL_xp = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0]-1)); # plt.subplot(2,3,1) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,2) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2.0 + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,3) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi + d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,4) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = 0.0 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,5) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi/2 - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.subplot(2,3,6) #SUBPLOT # ALL_xp[:,1] = 0.0 # ALL_xp[:,3] = 0.0 # ALL_xp[:,4] = np.pi - d # ALL_xp[:,5] = 0.0; # letsee_ = sess.run(Tt,{states:ConvCosSin(ALL_xp)}); # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,2],c=letsee_) # plt.colorbar() # plt.pause(0.1); k = 0 ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 1)) ALL_x[:, [3, 4, 5]] = ALL_x[:, [3, 4, 5]] * 2.0 PI, _, filterr = getPI(ALL_x, ALL_PI, subSamples=3) ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 1)) ALL_x_[:, [3, 4, 5]] = ALL_x_[:, [3, 4, 5]] * 2.0 PI_, _, filterr = getPI(ALL_x_, ALL_PI, subSamples=3) ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_) # tmp = np.random.randint(len(reach100s[:,:-1]), size=12000); # _,ZR = getPI(reach100s[tmp,:-1],ALL_PI) # #ZR = sess.run(Tt,{states:reach100s[:,:-1]}); # error1 = ZR - reach100s[tmp,-1,None]; # # # plt.figure(2) # _,Z000 = getPI(grid_eval,ALL_PI); # _,Z001 = getPI(grid_eval_,ALL_PI); # _,Z002 = getPI(grid_eval__,ALL_PI); # Z000 = np.reshape(Z000,X.shape); # Z001 = np.reshape(Z001,X.shape); # Z002 = np.reshape(Z002,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_out = (Z000 > 0.00) #| (Z000 < -0.05); # filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); # filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000[filter_out] = 0.0; # Z001[filter_out_] = 0.0; # Z002[filter_out__] = 0.0; # # _,Z000l = getPI(grid_evall,ALL_PI); # _,Z001l = getPI(grid_evall_,ALL_PI); # _,Z002l = getPI(grid_evall__,ALL_PI); # Z000l = np.reshape(Z000l,X.shape); # Z001l = np.reshape(Z001l,X.shape); # Z002l = np.reshape(Z002l,X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); # filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); # filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000l[filter_outl] = 0.0; # Z001l[filter_out_l] = 0.0; # Z002l[filter_out__l] = 0.0; # # plt.clf(); # #plt.plot(ALL_t_, np.abs(allE), 'ro'); # #plt.axis([-1.0, 0.0, 0.0, 10.0]) # plt.subplot(2,3,1) # plt.imshow(Z000,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,2) # plt.imshow(Z001,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,3) # plt.imshow(Z002,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,4) # plt.imshow(Z000l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,5) # plt.imshow(Z001l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.subplot(2,3,6) # plt.imshow(Z002l,cmap='gray'); # plt.plot([30*f, 30*f], [30*f, 70*f], 'r-', lw=1) # plt.plot([30*f, 70*f], [70*f, 70*f], 'r-', lw=1) # plt.plot([70*f, 70*f], [70*f, 30*f], 'r-', lw=1) # plt.plot([70*f, 30*f], [30*f, 30*f], 'r-', lw=1) # plt.pause(0.01); t = t - dt print('Again.') # sess.run(set_to_not_zero); # print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str(np.mean(np.abs(error1))) + "|ITR=" + str(i) #VAR # plt.figure(4) # plt.clf(); # plt.title(str([str(i)+" : "+str(perms[i]) for i in range(len(perms))])) # b_sele = (ALL_x[:,-1] < 6.1); # ALL_xp = ALL_x[b_sele]; # letsee_ = PI[b_sele]; # b_sele = (np.abs(ALL_xp[:,2]-np.pi/2.0 + 0.1) < 0.1); # ALL_xp = ALL_xp[b_sele]; # letsee_ = letsee_[b_sele]; # _,_ = getPI(ALL_xp); # #plt.subplot(2,3,1) #SUBPLOT # letsee_ = letsee_.argmax(axis=1); # plt.scatter(ALL_xp[:,0],ALL_xp[:,1],c=letsee_) # plt.colorbar() # plt.pause(0.01) # woot = np.array([[-0.15023694, -4.03420314, 1.56425333, 6.02741677], # [ 0.10373495, -4.34956515, 1.50186123, 6.08060291], # [ 0.13439703, -5.47363893, 1.60820922, 6.0519111 ], # [ 0.07739933, -4.93777028, 1.57579839, 6.00117299]]) # _,_ = getPI(woot,ALL_PI); #elif(i is 0): elif (np.mod(i, renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 1)) ALL_x[:, [3, 4, 5]] = ALL_x[:, [3, 4, 5]] * 2.0 PI, _, filterr = getPI(ALL_x, F_PI=[], subSamples=3) ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 1)) ALL_x_[:, [3, 4, 5]] = ALL_x_[:, [3, 4, 5]] * 2.0 PI_, _, filterr = getPI(ALL_x_, F_PI=[], subSamples=3) ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_) # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: pre_ALL_x, y: PI }) test_acc = sess.run(accuracy, { states: pre_ALL_x_, y: PI_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Lerning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.01 #/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: pre_ALL_x[tmp], y: PI[tmp], nu: nunu }) #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump(ALL_PI, open("policies6Dreach_h50.pkl", "wb"))
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Quad Params wMax = 3.0 wMin = -1.0 * wMax aMax = 2 * np.pi / 10.0 aMin = -1.0 * aMax max_list = [wMax, aMax] print 'Starting worker-' + str(ind) Nx = 101 minn = [-5.0, -5.0, 0.0, 6.0] maxx = [5.0, 5.0, 2 * np.pi, 12.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[1], maxx[1], Nx) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) reach100s = sio.loadmat('flat_1s.mat') reach100s = reach100s["M"] reach100s[:, [1, 2]] = reach100s[:, [2, 1]] reach100s[:, 2] = np.mod(reach100s[:, 2], 2.0 * np.pi) #mean_data = np.mean(reach100s[:,:-1],axis=0); #std_data = np.std(reach100s[:,:-1],axis=0); nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor #-1.0; #Has to be negative #VAR iters = 1000000 #VAR #center = np.array([[0.0,0.0]]) center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 #VAR num_ac = 2 ##################### INSTANTIATIONS ################# states, y, Tt, l_r, lb, reg = TransDef("Critic", False, layers, depth, incl, center) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic') #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 L = tf.sqrt( tf.reduce_mean( tf.reduce_sum(tf.square(tf.sub(y, Tt)), 1, keep_dims=True)) ) + beta * tf.reduce_mean( tf.reduce_max(tf.abs(grad_x), reduction_indices=1, keep_dims=True)) #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.01), (renew * 2 / 4, 0.007), (renew * 3 / 4, 0.005), (renew * 4 / 4, 0.002), ], outside_value=0.001) #train_step = tf.train.GradientDescentOptimizer(nu).minimize(L) #optimizer = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom);#.minimize(L) #optimizer = tf.train.AdamOptimizer(learning_rate=nu); optimizer = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom) gvs = optimizer.compute_gradients(L, V_func_vars) capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs] train_step = optimizer.apply_gradients(gvs) #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); # INITIALIZE GRAPH sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): sin_phi = np.sin(ALL_x[:, 2, None]) cos_phi = np.cos(ALL_x[:, 2, None]) col1 = np.multiply(ALL_x[:, 3, None], cos_phi) col2 = np.multiply(ALL_x[:, 3, None], sin_phi) col3 = opt_a[:, 0, None] col4 = opt_a[:, 1, None] return np.concatenate((col1, col2, col3, col4), axis=1) ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) Snx[:, 2] = p_corr(Snx[:, 2]) return Snx def opt_ac(grad): opt_dir_1_ = np.sign( grad[:, 2, None] ) * wMin #np.floor((np.sign(grad[:,1,None])+1.0)/2.0)*wMin + np.ceil((np.sign(grad[:,1,None])-1.0)/2.0)*wMin; opt_dir_2_ = np.sign( grad[:, 3, None] ) * aMin #np.floor((np.sign(grad[:,3,None])+1.0)/2.0)*aMin + np.ceil((np.sign(grad[:,3,None])-1.0)/2.0)*aMax; opt_a = np.concatenate((opt_dir_1_, opt_dir_2_), axis=1) return opt_a, None def V_ret(ALL_x): due = np.inf * np.ones([ALL_x.shape[0], 1]) perms = list(itertools.product([-1, 1], repeat=num_ac)) opt_actions = np.zeros([ALL_x.shape[0], num_ac]) for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [tmp1 * tmp2 for tmp1, tmp2 in zip(ac_tuple, max_list)] opt_a = np.asarray(ac_list) * np.ones([ALL_x.shape[0], 1]) Snx = RK4(ALL_x, dt, opt_a, None) due_tmp = np.min(np.concatenate((due, sess.run(Tt, {states: Snx})), axis=1), axis=1, keepdims=True) b_indexes = (due_tmp < due)[:, 0] opt_actions[b_indexes] = opt_a[b_indexes] due = due_tmp uno = sess.run(Tt, {states: ALL_x}) filt = [((Snx[:, k, None] > maxx[k]) | (Snx[:, k, None] < minn[k])) for k in range(len(minn))] filt = np.any(filt, axis=0) #due[filt] = np.inf; V = np.min(np.concatenate((uno, due), axis=1), axis=1, keepdims=True) return due, opt_actions #V,opt_actions; # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # ( ) # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps nunu = lr_schedule.value(k) for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): get_grads = sess.run(var_grad_, {states: ALL_x}) opt_a, _ = opt_ac(get_grads) opaye = [(np.float32(opa[i, 0]) == np.float32(opt_a[i, 0])) and (np.float32(opa[i, 1]) == np.float32(opt_a[i, 1])) for i in range(len(opa))] get_grads_ = sess.run(var_grad_, {states: ALL_x_}) opt_a_, _ = opt_ac(get_grads_) opaye_ = [(np.float32(opa_[i, 0]) == np.float32(opt_a_[i, 0])) and (np.float32(opa_[i, 1]) == np.float32(opt_a_[i, 1])) for i in range(len(opa_))] print "Train Accuracy = " + str( np.float(sum(opaye)) / 1000000.) + " Test Accuracy = " + str( np.float(sum(opaye_)) / 1000.) k = 0 ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0])) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 5.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 5.0 + 9.0 V, _ = V_ret(ALL_x) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 1000, layers[0])) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 5.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 5.0 + 9.0 V_, _ = V_ret(ALL_x_) ZR = sess.run(Tt, {states: reach100s[:, :-1]}) error1 = ZR - reach100s[:, -1, None] #error1 = 0.0;#targ_nn - sess.run(Tt,{states:into_nn}); # log_avg_error[kk] = np.max(np.abs(error1)); # log_error[kk] = np.mean(np.abs(error1)); Z000 = np.reshape(sess.run(Tt, {states: grid_eval}), X.shape) Z001 = np.reshape(sess.run(Tt, {states: grid_eval_}), X.shape) Z002 = np.reshape(sess.run(Tt, {states: grid_eval__}), X.shape) #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); filter_out = (Z000 > 0.00) #| (Z000 < -0.05); filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); #Z000[filter_in] = 1.0; Z000[filter_out] = 0.0 Z001[filter_out_] = 0.0 Z002[filter_out__] = 0.0 Z000l = np.reshape(sess.run(Tt, {states: grid_evall}), X.shape) Z001l = np.reshape(sess.run(Tt, {states: grid_evall_}), X.shape) Z002l = np.reshape(sess.run(Tt, {states: grid_evall__}), X.shape) #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); #Z000[filter_in] = 1.0; Z000l[filter_outl] = 0.0 Z001l[filter_out_l] = 0.0 Z002l[filter_out__l] = 0.0 plt.clf() #plt.plot(ALL_t_, np.abs(allE), 'ro'); #plt.axis([-1.0, 0.0, 0.0, 10.0]) plt.subplot(2, 3, 1) plt.imshow(Z000, cmap='gray') plt.subplot(2, 3, 2) plt.imshow(Z001, cmap='gray') plt.subplot(2, 3, 3) plt.imshow(Z002, cmap='gray') plt.subplot(2, 3, 4) plt.imshow(Z000l, cmap='gray') plt.subplot(2, 3, 5) plt.imshow(Z001l, cmap='gray') plt.subplot(2, 3, 6) plt.imshow(Z002l, cmap='gray') plt.pause(0.01) print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str( np.mean(np.abs(error1))) + " REG = " + str( sess.run(reg)) + ") | MSE = " + str(mse) + "|ITR=" + str( i) #VAR t = t - dt #elif(i is 0): elif (np.mod(i, renew) == 0 and i is 0): k = 0 sess.run(set_to_zero) ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0])) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 5.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 5.0 + 9.0 V, opa = V_ret(ALL_x) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 1000, layers[0])) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 5.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 5.0 + 9.0 V_, opa_ = V_ret(ALL_x_) sess.run(set_to_not_zero) # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): mse = sess.run(L, { states: ALL_x, y: V }) test_e = sess.run(L, { states: ALL_x_, y: V_ }) print str(i) + ") | MSE = " + str(mse) + " | Test_E = " + str( test_e) + " | Lerning Rate = " + str(nunu) nunu = 0.001 #lr_schedule.value(k); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: ALL_x[tmp], y: V[tmp], nu: nunu })
def main(layers,t_hor,ind,nrolls,bts,ler_r,mom,teps,renew,imp,q): # Quad Params m0 = 1.5; m1 = 0.5; m2 = 0.75; L1 = 0.5; l1 = L1/2.0; L2 = 0.75; l2 = L2/2.0; I1 = m1*L1**2 / 12.0; I2 = m2*L2**2 / 12.0; d1 = m0+m1+m2; d2 = (m1/2.0 + m2)*L1 d3 = m2*l2 d4 = (m1/3.0 + m2)*L1**2 d5 = m2*L1*l2 d6 = m2*l2**2 + I2 g = 9.81; f1 = (m1*l1 + m2*L1)*g f2 = m2*l2*g min_list = [-1.0]; max_list = [1.0]; print 'Starting worker-' + str(ind) f = 1; Nx = 100*f + 1; minn = [-5.0,-10.0,-5.0,-10.0,0.0,-10.0]; maxx = [ 5.0, 10.0, 5.0, 10.0,2*np.pi, 10.0]; # X = np.linspace(minn[0],maxx[0],Nx); # Y = np.linspace(minn[2],maxx[2],Nx); # Z = np.linspace(minn[4],maxx[4],Nx); # X_,Y_,Z_ = np.meshgrid(X, Y, Z); # X,Y = np.meshgrid(X, Y); # XX = np.reshape(X,[-1,1]); # YY = np.reshape(Y,[-1,1]); # XX_ = np.reshape(X_,[-1,1]); # YY_ = np.reshape(Y_,[-1,1]); # ZZ_ = np.reshape(Z_,[-1,1]); grid_check = np.concatenate((XX_,np.ones(XX_.shape),YY_,np.ones(XX_.shape),ZZ_,np.zeros(XX_.shape)),axis=1); # grid_eval = np.concatenate((XX,YY,0.0*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_eval_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_eval__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); # grid_evall = np.concatenate((XX,YY,0.0*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # grid_evall_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # grid_evall__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # Calculate number of parameters of the policy nofparams = 0; for i in xrange(len(layers)-1): nofparams += layers[i]*layers[i+1] + layers[i+1]; print 'Number of Params is: ' + str(nofparams) H_length = t_hor; center = np.array([[0.0,0.0,0.0,0.0,0.0,0.0]]) depth = 2.0; incl = 1.0; ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.1; #VAR num_ac = 1; iters = int(np.abs(t_hor)/dt)*renew + 1; ##################### INSTANTIATIONS ################# states,y,Tt,L,l_r,lb,reg, cross_entropy = TransDef("Critic",False,layers,depth,incl,center); ola1 = tf.argmax(Tt,dimension=1) ola2 = tf.argmax(y,dimension=1) ola3 = tf.equal(ola1,ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)); #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic'); #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt,states)[0] grad_x = tf.slice(var_grad_,[0,0],[-1,layers[0]-1]); #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append(var.assign(tf.random_uniform(tf.shape(var),minval=-0.1,maxval=0.1))); set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0;#1.0**(-3.5);#0.01; beta = 0.00; #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01 ), (20000, 0.001 ), (30000, 0.0001 ), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom).minimize(L); #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64,shape=(None)); make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables(); sess = tf.Session(); init = tf.initialize_all_variables(); sess.run(init); def V_0(x): #return np.linalg.norm(x,ord=np.inf,axis=1,keepdims=True) return np.linalg.norm(x,axis=1,keepdims=True) def p_corr(ALL_x): ALL_x = np.mod(ALL_x + np.pi,2.0*np.pi) - np.pi; return ALL_x; def F(ALL_x,opt_a,opt_b): v1 = ALL_x[:,3,None]; w1 = ALL_x[:,4,None]; w2 = ALL_x[:,5,None]; cos_t1 = np.cos(ALL_x[:,1,None]); sin_t1 = np.sin(ALL_x[:,1,None]); t1 = np.cos(ALL_x[:,1,None]); cos_t2 = np.cos(ALL_x[:,2,None]); sin_t2 = np.sin(ALL_x[:,2,None]); t2 = np.cos(ALL_x[:,2,None]); #n_c = d4*(d3*cos_t2)**2.0 + d1*(d5*np.cos(t1-t2))**2.0 + d6*((d2*cos_t1)**2.0 -d1*d4) - 2.0*d2*d3*d5*cos_t1*np.cos(t1-t2)*cos_t2 #n_c = (d1*d4*d6 - d1*(d5*np.cos(t2-t1))**2.0 - d6*(d2*cos_t1)**2.0 + 2.0*d2*d3*d5*cos_t2*cos_t1*np.cos(t2-t1) - d4*(d3*cos_t2)**2.0); try: D11 = (d4*d6 - (np.cos(t1-t2)*d5)**2.0); D12 = (d3*d5*cos_t2*np.cos(t1-t2) - d2*d6*cos_t1); D13 = (d2*d5*np.cos(t1-t2)*cos_t1 - d3*d4*cos_t2); D21 = D12; D22 = (d1*d6 - (d3*cos_t2)**2); D23 = (d2*d3*cos_t2*cos_t1 - d1*d5*np.cos(t1-t2)); D31 = D13; D32 = D23; D33 = (d1*d4 - (d2*cos_t1)**2.0); n_c_ = L1**2*L2**2*m2*(m0*m1 + m1**2*sin_t1**2 + m1*m2*sin_t1**2 + m0*m2*np.sin(t1-t2)**2) n_c = d1*D11 + d2*cos_t1*D12 + d3*cos_t2*D13 n_c2 = d2*cos_t1*D21 + d4*D22 + d5*np.cos(t1-t2)*D23 n_c3 = d3*cos_t2*D31 + d5*np.cos(t1-t2)*D32 + d6*D33 C11 = 0.0; C12 = -d2*sin_t1*w1; C13 = -d3*sin_t2*w2; C21 = 0.0; C22 = 0.0; C23 = d5*np.sin(t1-t2)*w2; C31 = 0.0; C32 = -d5*np.sin(t1-t2)*w1; C33 = 0.0; G1 = 0.0; G2 = -f1*sin_t1; G3 = -f2*sin_t2; DC11 = 0.0; DC12 = D11*C12 + D13*C32; DC13 = D11*C13 + D12*C23; DC21 = 0.0; DC22 = D21*C12 + D23*C32; DC23 = D21*C13 + D22*C23; DC31 = 0.0; DC32 = D31*C12 + D33*C32; DC33 = D31*C13 + D32*C23; DG1 = D11*G1 + D12*G2 + D13*G3; DG2 = D21*G1 + D22*G2 + D23*G3; DG3 = D31*G1 + D32*G2 + D33*G3; col1 = v1; col2 = w1; col3 = w2; col4 = ( -(DC11*v1 + DC12*w1 + DC13*w2) - 0.1*v1 - DG1 + D11*opt_a)/n_c_ col5 = ( -(DC21*v1 + DC22*w1 + DC23*w2) - 0.1*w1 - DG2 + D21*opt_a)/n_c_ col6 = ( -(DC31*v1 + DC32*w1 + DC33*w2) - 0.1*w2 - DG3 + D31*opt_a)/n_c_ except RuntimeWarning: print("Whoops...") return np.concatenate((col1,col2,col3,col4,col5,col6),axis=1); #Dynamics # # (a) d1 = m0+m1+m2; # (b) d2 = m1*l1 + m2*L2 # (c) d3 = m2*l2 # (d) d4 = m1*l1**2 + m2*L1**2 + I1 # (e) d5 = m2*L1*l2 # (f) d6 = m2*l2**2 + I2 # # g = 9.81; # # f1 = (m1*l1 + m2*L1)*g # f2 = m2*l2*g ####################### RECURSIVE FUNC #################### def RK4(ALL_x,dtt,opt_a,opt_b): k1 = F(ALL_x,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k1); ALL_tmp[:,[1,2]] = p_corr(ALL_tmp[:,[1,2]]); k2 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k2); ALL_tmp[:,[1,2]] = p_corr(ALL_tmp[:,[1,2]]); k3 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt,k3); ALL_tmp[:,[1,2]] = p_corr(ALL_tmp[:,[1,2]]); k4 = F(ALL_tmp,opt_a,opt_b); #### !!! Snx = ALL_x + np.multiply((dtt/6.0),(k1 + 2.0*k2 + 2.0*k3 + k4)); #np.multiply(dtt,k1) Snx[:,[1,2]] = p_corr(Snx[:,[1,2]]); return Snx; perms = list(itertools.product([-1,1], repeat=num_ac)) true_ac_list = []; for i in range(len(perms)): #2**num_actions ac_tuple = perms[i]; ac_list = [(tmp1==1)*tmp3 + (tmp1==-1)*tmp2 for tmp1,tmp2,tmp3 in zip(ac_tuple,min_list,max_list)]; true_ac_list.append(ac_list); def Hot_to_Cold(hots,ac_list): a = hots.argmax(axis=1); a = np.asarray([ac_list[i] for i in a]); return a; def getPI(ALL_x,F_PI=[],subSamples=1): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta); #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = []; for i in range(len(perms)): opt_a = np.asarray(true_ac_list[i])*np.ones([ALL_x.shape[0],1]); Snx = ALL_x; for _ in range(subSamples): Snx = RK4(Snx,dt/float(subSamples),opt_a,None); next_states.append(Snx); next_states = np.concatenate(next_states,axis=0); values = V_0(next_states[:,[1,2,3]]); for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])); for _ in range(subSamples): hots = sess.run(Tt,{states:ConvCosSin(next_states)}); opt_a = Hot_to_Cold(hots,true_ac_list) next_states = RK4(next_states,dt/float(subSamples),opt_a,None); values = np.max((values,V_0(next_states[:,[1,2,3]])),axis=0); values_ = values#V_0(next_states); compare_vals_ = values_.reshape([-1,ALL_x.shape[0]]).T; #Changed to values instead of values_ index_best_a_ = compare_vals_.argmin(axis=1) #Changed to ARGMIN values_ = np.min(compare_vals_,axis=1,keepdims=True); filterr = 0#np.max(compare_vals_,axis=1) > -0.8 #index_best_a_ = index_best_a_[filterr] #values_ = values_[filterr] #print("States filtered out: "+str(len(filterr)-np.sum(filterr))) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])); return sess.run(make_hot,{hot_input:index_best_a_}),values_,filterr # def getTraj(ALL_x,F_PI=[],subSamples=1,StepsLeft=None,Noise = False): # # current_params = sess.run(theta); # # if(StepsLeft == None): StepsLeft = len(F_PI); # # next_states = ALL_x; # traj = [next_states]; # actions = []; # # for params in F_PI[len(F_PI)-StepsLeft:]: # for ind in range(len(params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(params[ind])); # # hots = sess.run(Tt,{states:ConvCosSin(next_states)}); # opt_a = Hot_to_Cold(hots,true_ac_list) # for _ in range(subSamples): # next_states = RK4(next_states,dt/float(subSamples),opt_a,None); # if Noise: # next_states = next_states + np.random.normal(size=next_states.shape)*0.01 # traj.append(next_states); # actions.append(hots.argmax(axis=1)[0]); # #values = np.min((values,V_0(next_states[:,[0,1]])),axis=0); # # for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(current_params[ind])); # # return traj,actions,V_0(next_states[:,[0,2]]); def getTraj(ALL_x,F_PI=[],subSamples=1,StepsLeft=None,Noise=False, Static=False): current_params = sess.run(theta); if(StepsLeft == None): StepsLeft = len(F_PI); next_states = ALL_x; traj = [next_states]; actions = []; if Static: steps = input("How Many Steps? ") for ind in range(len(F_PI[len(F_PI)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(F_PI[len(F_PI)-StepsLeft][ind])); for i in range(steps): for _ in range(subSamples): tmp = ConvCosSin(next_states); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: next_states = next_states + np.random.normal(size=next_states.shape)*0.01 next_states = RK4(next_states,dt/float(subSamples),opt_a,None); traj.append(next_states); actions.append(hots.argmax(axis=1)[0]); else: for params in F_PI[len(F_PI)-StepsLeft:]: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])); for _ in range(subSamples): tmp = ConvCosSin(next_states); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: next_states = next_states + np.random.normal(size=next_states.shape)*0.01 next_states = RK4(next_states,dt/float(subSamples),opt_a,None); traj.append(next_states); actions.append(hots.argmax(axis=1)[0]); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])); return traj,actions,V_0(next_states[:,[0,2]]) def ConvCosSin(ALL_x): sin_phi = np.sin(ALL_x[:,[1,2]]) cos_phi = np.cos(ALL_x[:,[1,2]]) pos = ALL_x[:,[0]]/5.0; vel = ALL_x[:,[3]]/10.0; arate = ALL_x[:,[4,5]]/5.0; ret_val = np.concatenate((pos,vel,arate,sin_phi,cos_phi),axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time(); t = 0.0; mse = np.inf; k=0; kk = 0; beta=3.0; batch_size = bts; tau = 1000.0; steps = teps; ALL_PI = []; nunu = lr_schedule.value(k); act_color = ['r','g','b','y']; if(imp == 1.0): ALL_PI = pickle.load( open( "policies6Dreach_h50.pkl", "rb" ) ); cc = 0; while True: state_get = input('State: '); sub_smpl = input('SUBSAMPLING: '); pause_len = input('Pause: ') s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): ") stat = input("Static? (0/1): ") traj,act,_ = getTraj(state_get,F_PI=ALL_PI,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat); #act.append(act[-1]); all_to = np.concatenate(traj); plt.scatter(all_to[:,[1]],all_to[:,[2]])#,color=act_color[cc % len(act_color)]) plt.pause(pause_len); cc = cc + 1; #plt.colorbar() for i in xrange(iters): if(np.mod(i,renew) == 0 and i is not 0): ALL_PI.insert(0,sess.run(theta)) plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... -> Solved: colors in a scatter plot only depend on the labels plt.clf(); ALL_xx = np.array([[0.0,0.1,0.1,0.0,0.0,0.0], [0.0,np.pi,np.pi,0.0,0.0,0.0], [0.5,0.0,0.0,0.0,0.0,0.0], [0.0,-np.pi/2,np.pi/2,0.0,0.0,0.0]]); for tmmp in range(ALL_xx.shape[0]): traj,act,_ = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,subSamples=10); #act.append(act[-1]); all_to = np.concatenate(traj); plt.scatter(all_to[:,[1]],all_to[:,[2]])#c=[act_color[ii] for ii in act]); plt.pause(0.25) k = 0; ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-2)); ALL_x[:,[1,2]] = ALL_x[:,[1,2]]*np.pi/5.0; ALL_x[:,[3]] = ALL_x[:,[3]]*2.0; ALL_x[:,[4,5]] = ALL_x[:,[4,5]]; PI,_,filterr = getPI(ALL_x,ALL_PI,subSamples=3); #ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x); ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-2)); ALL_x_[:,[1,2]] = ALL_x_[:,[1,2]]*np.pi/5.0; ALL_x_[:,[3]] = ALL_x_[:,[3]]*2.0; ALL_x_[:,[4,5]] = ALL_x_[:,[4,5]]; PI_,_,filterr = getPI(ALL_x_,ALL_PI,subSamples=3); #ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_); t = t - dt; print('Again.') elif(np.mod(i,renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-2)); ALL_x[:,[1,2]] = ALL_x[:,[1,2]]*np.pi/5.0; ALL_x[:,[3]] = ALL_x[:,[3]]*2.0; ALL_x[:,[4,5]] = ALL_x[:,[4,5]]; PI,_,filterr = getPI(ALL_x,F_PI=[],subSamples=3); #ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x); elapsed = time.time() - t print("Compute Data Time = "+str(elapsed)) ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-2)); ALL_x_[:,[1,2]] = ALL_x_[:,[1,2]]*np.pi/5.0; ALL_x_[:,[3]] = ALL_x_[:,[3]]*2.0; ALL_x_[:,[4,5]] = ALL_x_[:,[4,5]]; PI_,_,filterr = getPI(ALL_x_,F_PI=[],subSamples=3); #ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_); # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if(np.mod(i,200) == 0): train_acc = sess.run(accuracy,{states:pre_ALL_x,y:PI}); test_acc = sess.run(accuracy,{states:pre_ALL_x_,y:PI_}); print str(i) + ") | TR_ACC = " + str(train_acc) + " | TE_ACC = " + str(test_acc) + " | Lerning Rate = " + str(nunu) nunu = 0.01 tmp = np.random.randint(len(ALL_x), size=bts); sess.run(train_step, feed_dict={states:pre_ALL_x[tmp],y:PI[tmp],nu:nunu}); pickle.dump(ALL_PI,open( "policies6Dreach_h50.pkl", "wb" ));
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Quad Params wMax = 3.0 wMin = -1.0 * wMax aMax = 2 * np.pi / 10.0 aMin = -1.0 * aMax max_list = [wMax, aMax] min_list = [wMin, aMin] print 'Starting worker-' + str(ind) Nx = 101 minn = [-5.0, -5.0, 0.0, 6.0] maxx = [5.0, 5.0, 2 * np.pi, 12.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[1], maxx[1], Nx) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) reach100s = sio.loadmat('flat_1s.mat') reach100s = reach100s["M"] reach100s[:, [1, 2]] = reach100s[:, [2, 1]] reach100s[:, 2] = np.mod(reach100s[:, 2], 2.0 * np.pi) #mean_data = np.mean(reach100s[:,:-1],axis=0); #std_data = np.std(reach100s[:,:-1],axis=0); nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor #-1.0; #Has to be negative #VAR iters = 1000000 #VAR #center = np.array([[0.0,0.0]]) center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 #VAR num_ac = 2 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Critic", False, layers, depth, incl, center) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); #theta = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic'); #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); # set_to_zero = [] # for var in sorted(V_func_vars, key=lambda v: v.name): # set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) # set_to_zero = tf.group(*set_to_zero) # # set_to_not_zero = [] # for var in sorted(V_func_vars, key=lambda v: v.name): # set_to_not_zero.append(var.assign(tf.random_uniform(tf.shape(var),minval=-0.1,maxval=0.1))); # set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 L = tf.sqrt( tf.reduce_mean( tf.reduce_sum(tf.square(tf.sub(y, Tt)), 1, keep_dims=True)) ) + beta * tf.reduce_mean( tf.reduce_max(tf.abs(grad_x), reduction_indices=1, keep_dims=True)) #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 4, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): return np.linalg.norm(x, ord=np.inf, axis=1, keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): sin_phi = np.sin(ALL_x[:, 2, None]) cos_phi = np.cos(ALL_x[:, 2, None]) col1 = np.multiply(ALL_x[:, 3, None], cos_phi) col2 = np.multiply(ALL_x[:, 3, None], sin_phi) col3 = opt_a[:, 0, None] col4 = opt_a[:, 1, None] return np.concatenate((col1, col2, col3, col4), axis=1) ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, 2] = p_corr(ALL_tmp[:, 2]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) Snx[:, 2] = p_corr(Snx[:, 2]) return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) def Hot_to_Cold(opt_a): for k in range(len(max_list)): ind_max = (opt_a[:, [k]] > 0.0) opt_a[ind_max] = max_list[k] opt_a[not (ind_max)] = min_list[k] return opt_a def getPI( ALL_x, F_PI=[] ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta) #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = [] true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [tmp1 * tmp2 for tmp1, tmp2 in zip(ac_tuple, max_list)] #ASSUMING: aMax = -aMin true_ac_list.append(ac_list) opt_a = np.asarray(ac_list) * np.ones([ALL_x.shape[0], 1]) Snx = RK4(ALL_x, dt, opt_a, None) next_states.append(Snx) next_states = np.concatenate(next_states, axis=0) values = V_0(next_states[:, [0, 1]]) for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[i])) opt_a = sess.run(Tt, {states: next_states}) next_states = RK4(ALL_x, dt, opt_a, None) values = np.min((values, V_0(next_states[:, [0, 1]])), axis=1, keepdims=True) compare_vals = values.reshape([ALL_x.shape[0], -1]) values = np.min(compare_vals, axis=1, keepdims=True) index_best_a = compare_vals.argmin(axis=1) #.reshape([-1,1]); best_actions = np.asarray([true_ac_list[i] for i in index_best_a]) final_values = np.min((values, V_0(ALL_x[:, [0, 1]])), axis=1) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) #return index_best_a,final_values return best_actions, final_values # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # ( ) # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] nunu = lr_schedule.value(k) for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(theta)) k = 0 ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0])) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 5.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 5.0 + 9.0 PI, _ = getPI(ALL_x, ALL_PI) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0])) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 5.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 5.0 + 9.0 PI_, _ = getPI(ALL_x_, ALL_PI) #ZR = getPI #ZR = sess.run(Tt,{states:reach100s[:,:-1]}); #error1 = ZR - reach100s[:,-1,None]; # Z000 = np.reshape(sess.run(Tt,{states:grid_eval}),X.shape); # Z001 = np.reshape(sess.run(Tt,{states:grid_eval_}),X.shape); # Z002 = np.reshape(sess.run(Tt,{states:grid_eval__}),X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_out = (Z000 > 0.00) #| (Z000 < -0.05); # filter_out_ = (Z001 > 0.00) #| (Z000 < -0.05); # filter_out__ = (Z002 > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000[filter_out] = 0.0; # Z001[filter_out_] = 0.0; # Z002[filter_out__] = 0.0; # # Z000l = np.reshape(sess.run(Tt,{states:grid_evall}),X.shape); # Z001l = np.reshape(sess.run(Tt,{states:grid_evall_}),X.shape); # Z002l = np.reshape(sess.run(Tt,{states:grid_evall__}),X.shape); # #filter_in = (Z000 <= 0.05) #& (Z000 >= 0.05); # filter_outl = (Z000l > 0.00) #| (Z000 < -0.05); # filter_out_l = (Z001l > 0.00) #| (Z000 < -0.05); # filter_out__l = (Z002l > 0.00) #| (Z000 < -0.05); # #Z000[filter_in] = 1.0; # Z000l[filter_outl] = 0.0; # Z001l[filter_out_l] = 0.0; # Z002l[filter_out__l] = 0.0; # # plt.clf(); # #plt.plot(ALL_t_, np.abs(allE), 'ro'); # #plt.axis([-1.0, 0.0, 0.0, 10.0]) # plt.subplot(2,3,1) # plt.imshow(Z000,cmap='gray'); # plt.subplot(2,3,2) # plt.imshow(Z001,cmap='gray'); # plt.subplot(2,3,3) # plt.imshow(Z002,cmap='gray'); # plt.subplot(2,3,4) # plt.imshow(Z000l,cmap='gray'); # plt.subplot(2,3,5) # plt.imshow(Z001l,cmap='gray'); # plt.subplot(2,3,6) # plt.imshow(Z002l,cmap='gray'); # plt.pause(0.01); # # # print str(t) + " || " + str(np.max(np.abs(error1))) + " , " + str(np.mean(np.abs(error1))) + " REG = " + str(sess.run(reg)) + ") | MSE = " + str(mse) + "|ITR=" + str(i) #VAR t = t - dt #elif(i is 0): elif (np.mod(i, renew) == 0 and i is 0): k = 0 # sess.run(set_to_zero); ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0])) ALL_x[:, 2] = ALL_x[:, 2] * np.pi / 5.0 + np.pi ALL_x[:, 3] = ALL_x[:, 3] * 3.0 / 5.0 + 9.0 PI, _ = getPI(ALL_x) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0])) ALL_x_[:, 2] = ALL_x_[:, 2] * np.pi / 5.0 + np.pi ALL_x_[:, 3] = ALL_x_[:, 3] * 3.0 / 5.0 + 9.0 PI_, _ = getPI(ALL_x_) # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: ALL_x, y: PI }) test_acc = sess.run(accuracy, { states: ALL_x_, y: PI_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Lerning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001 #/np.log(i+2.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: ALL_x[tmp], y: PI[tmp], nu: nunu })
def main(layers,t_hor,ind,nrolls,bts,ler_r,mom,teps,renew,imp,q): # Quad Params #Change to limit control in pitch or roll max_list = [0.1,0.1,11.81,1.0]; #w=1 min_list = [-0.1,-0.1,7.81,-1.0]; max_list_ = [0.5,0.5,0.5] min_list_ = [-0.5,-0.5,-0.5] g = 9.81; print 'Starting worker-' + str(ind) f = 1; Nx = 100*f + 1; minn = [-5.0,-10.0,-5.0,-10.0,0.0,-10.0]; maxx = [ 5.0, 10.0, 5.0, 10.0,2*np.pi, 10.0]; X = np.linspace(minn[0],maxx[0],Nx); Y = np.linspace(minn[2],maxx[2],Nx); Z = np.linspace(minn[4],maxx[4],Nx); X_,Y_,Z_ = np.meshgrid(X, Y, Z); X,Y = np.meshgrid(X, Y); XX = np.reshape(X,[-1,1]); YY = np.reshape(Y,[-1,1]); XX_ = np.reshape(X_,[-1,1]); YY_ = np.reshape(Y_,[-1,1]); ZZ_ = np.reshape(Z_,[-1,1]); grid_check = np.concatenate((XX_,np.ones(XX_.shape),YY_,np.ones(XX_.shape),ZZ_,np.zeros(XX_.shape)),axis=1); grid_eval = np.concatenate((XX,YY,0.0*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_eval_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_eval__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),6.0*np.ones(XX.shape)),axis=1); grid_evall = np.concatenate((XX,YY,0.0*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); grid_evall_ = np.concatenate((XX,YY,(2.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); grid_evall__ = np.concatenate((XX,YY,(4.0/3.0)*np.pi*np.ones(XX.shape),12.0*np.ones(XX.shape)),axis=1); # Calculate number of parameters of the policy nofparams = 0; for i in xrange(len(layers)-1): nofparams += layers[i]*layers[i+1] + layers[i+1]; print 'Number of Params is: ' + str(nofparams) H_length = t_hor; center = np.array([[0.0,0.0,0.0,0.0,0.0,0.0]]) depth = 2.0; incl = 1.0; ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.1; #VAR num_ac = 4; dist_ac = 3; iters = int(np.abs(t_hor)/dt)*renew + 1; ##################### INSTANTIATIONS ################# states,y,Tt,L,l_r,lb,reg, cross_entropy = TransDef("Control",False,layers,depth,incl,center); layers_ = layers[:] layers_[-1] = 2**dist_ac states_,y_,Tt_,L_,l_r_,lb_,reg_, cross_entropy_ = TransDef("Disturbance",False,layers_,depth,incl,center); ola1 = tf.argmax(Tt,dimension=1) ola2 = tf.argmax(y,dimension=1) ola3 = tf.equal(ola1,ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)); ola1_ = tf.argmax(Tt_,dimension=1) ola2_ = tf.argmax(y_,dimension=1) ola3_ = tf.equal(ola1_,ola2_) accuracy_ = tf.reduce_mean(tf.cast(ola3_, tf.float32)); #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); C_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Control'); D_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Disturbance'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt,states)[0] grad_x = tf.slice(var_grad_,[0,0],[-1,layers[0]-1]); #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_not_zero.append(var.assign(tf.random_uniform(tf.shape(var),minval=-0.1,maxval=0.1))); set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0;#1.0**(-3.5);#0.01; beta = 0.00; #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01 ), (20000, 0.001 ), (30000, 0.0001 ), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom).minimize(L); train_step_ = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom).minimize(L_); #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64,shape=(None)); make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) make_hot_ = tf.one_hot(hot_input, 2**dist_ac, on_value=1, off_value=0) # INITIALIZE GRAPH sess = tf.Session(); init = tf.initialize_all_variables(); sess.run(init); def V_0(x): #return np.linalg.norm(x,ord=np.inf,axis=1,keepdims=True) - 1.0 return np.linalg.norm(x,axis=1,keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x,2.0*np.pi); return ALL_x; # def F(ALL_x,opt_a,opt_b):#(grad,ALL_x): # col1 = ALL_x[:,3,None] - opt_b[:,0,None] # col2 = ALL_x[:,4,None] - opt_b[:,1,None] # col3 = ALL_x[:,5,None] - opt_b[:,2,None] # col4 = g*opt_a[:,0,None] # col5 = -g*opt_a[:,1,None] # col6 = opt_a[:,2,None] - g # # return np.concatenate((col1,col2,col3,col4,col5,col6),axis=1); def F(ALL_x,opt_a,opt_b):#(grad,ALL_x): col1 = ALL_x[:,3,None] - opt_b[:,0,None] col2 = ALL_x[:,4,None] - opt_b[:,1,None] col3 = ALL_x[:,5,None] - opt_b[:,2,None] col4 = np.multiply(opt_a[:,2,None],np.multiply(np.cos(ALL_x[:,-1,None]),opt_a[:,0,None]) + np.multiply(np.sin(ALL_x[:,-1,None]),opt_a[:,1,None])) col5 = np.multiply(opt_a[:,2,None],-np.multiply(np.cos(ALL_x[:,-1,None]),opt_a[:,1,None]) + np.multiply(np.sin(ALL_x[:,-1,None]),opt_a[:,0,None])) col6 = np.multiply(opt_a[:,2,None],np.multiply(np.cos(opt_a[:,0,None]),np.cos(opt_a[:,1,None]))) - g col7 = opt_a[:,3,None] return np.concatenate((col1,col2,col3,col4,col5,col6,col7),axis=1); ####################### RECURSIVE FUNC #################### def RK4(ALL_x,dtt,opt_a,opt_b): #Try Euler k1 = F(ALL_x,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k1); ALL_tmp[:,-1] = p_corr(ALL_tmp[:,-1]); k2 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt/2.0,k2); ALL_tmp[:,-1] = p_corr(ALL_tmp[:,-1]); k3 = F(ALL_tmp,opt_a,opt_b); #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt,k3); ALL_tmp[:,-1] = p_corr(ALL_tmp[:,-1]); k4 = F(ALL_tmp,opt_a,opt_b); #### !!! Snx = ALL_x + np.multiply((dtt/6.0),(k1 + 2.0*k2 + 2.0*k3 + k4)); #np.multiply(dtt,k1) ALL_tmp[:,-1] = p_corr(ALL_tmp[:,-1]); return Snx; perms = list(itertools.product([-1,1], repeat=num_ac)) true_ac_list = []; for i in range(len(perms)): #2**num_actions ac_tuple = perms[i]; ac_list = [(tmp1==1)*tmp3 + (tmp1==-1)*tmp2 for tmp1,tmp2,tmp3 in zip(ac_tuple,min_list,max_list)]; true_ac_list.append(ac_list); perms_ = list(itertools.product([-1,1], repeat=dist_ac)) true_ac_list_ = []; for i in range(len(perms_)): #2**num_actions ac_tuple_ = perms_[i]; ac_list_ = [(tmp1==1)*tmp3 + (tmp1==-1)*tmp2 for tmp1,tmp2,tmp3 in zip(ac_tuple_,min_list_,max_list_)]; #ASSUMING: aMax = -aMin true_ac_list_.append(ac_list_); def Hot_to_Cold(hots,ac_list): a = hots.argmax(axis=1); a = np.asarray([ac_list[i] for i in a]); return a; def getPI(ALL_x,F_PI=[], F_PI_=[], subSamples=1): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(C_func_vars); current_params_ = sess.run(D_func_vars); #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states_ = []; for k in range((len(perms))): next_states = []; opt_a = np.asarray(true_ac_list[k])*np.ones([ALL_x.shape[0],1]); for i in range(len(perms_)): opt_b = np.asarray(true_ac_list_[i])*np.ones([ALL_x.shape[0],1]); Snx = ALL_x; for _ in range(subSamples): Snx = RK4(Snx,dt/float(subSamples),opt_a,opt_b); next_states.append(Snx); next_states_.append(np.concatenate(next_states,axis=0)); next_states_ = np.concatenate(next_states_,axis=0); values = V_0(next_states_[:,[0,1,2]]); for params,params_ in zip(F_PI,F_PI_): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])); for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])); tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) hots = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots,true_ac_list_) for _ in range(subSamples): next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); values = np.max((values,V_0(next_states_[:,[0,1,2]])),axis=0); values_ = values;#V_0(next_states_[:,[0,1,2]]); pre_compare_vals_ = values_.reshape([-1,ALL_x.shape[0]]).T; #Changed to values instead of values_ final_v = []; final_v_ = []; per = len(perms_); for k in range(len(perms)): final_v.append(np.argmax(pre_compare_vals_[:,k*per:(k+1)*per,None],axis=1)) final_v_.append(np.max(pre_compare_vals_[:,k*per:(k+1)*per,None],axis=1)) finalF = np.concatenate(final_v_,axis=1); index_best_a_ = np.argmin(finalF,axis=1); finalF_ = np.concatenate(final_v,axis=1); index_best_b_ = np.array([finalF_[k,index_best_a_[k]] for k in range(len(index_best_a_))]); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])); for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])); return sess.run(make_hot,{hot_input:index_best_a_}),sess.run(make_hot_,{hot_input:index_best_b_}) # def getTraj(ALL_x,F_PI=[],F_PI_=[],subSamples=1,StepsLeft=None,Noise = False): # # current_params = sess.run(C_func_vars); # current_params_ = sess.run(D_func_vars); # # if(StepsLeft == None): StepsLeft = len(F_PI); # # next_states_ = ALL_x; # traj = [next_states_]; # actions = []; # # for params,params_ in zip(F_PI,F_PI_): # for ind in range(len(params)): #Reload pi*(x,t+dt) parameters # sess.run(C_func_vars[ind].assign(params[ind])); # for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters # sess.run(D_func_vars[ind].assign(params_[ind])); # # tmp = ConvCosSin(next_states_); # hots = sess.run(Tt,{states:tmp}); # opt_a = Hot_to_Cold(hots,true_ac_list) # hots_ = sess.run(Tt_,{states_:tmp}); # opt_b = Hot_to_Cold(hots_,true_ac_list_) # for _ in range(subSamples): # next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); # traj.append(next_states_); # actions.append(hots.argmax(axis=1)[0]); # # for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters # sess.run(C_func_vars[ind].assign(current_params[ind])); # for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters # sess.run(D_func_vars[ind].assign(current_params_[ind])); # # return traj,actions#,V_0(next_states[:,[0,2]]),actions; def getTraj(ALL_x,F_PI=[],F_PI_=[],subSamples=1,StepsLeft=None,Noise=False, Static=False, justV=False, disturb = -1, steps = -1): current_params = sess.run(C_func_vars); current_params_ = sess.run(D_func_vars); if(StepsLeft == None): StepsLeft = len(F_PI); next_states_ = ALL_x; traj = [next_states_]; actions = []; values = V_0(next_states_[:,[0,1,2]]); if Static: if(steps < 0): disturb = input("Disturbance Policy = ") steps = input("How Many Steps? ") for ind in range(len(F_PI[len(F_PI)-StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(F_PI[len(F_PI)-StepsLeft][ind])); for ind in range(len(F_PI_[len(F_PI_)-disturb])): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(F_PI_[len(F_PI_)-disturb][ind])); for i in range(steps): for _ in range(subSamples): tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: hots_ = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots_,true_ac_list_) else: hots_ = np.zeros((1,2**dist_ac)); hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_,true_ac_list_) next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); if not justV: traj.append(next_states_); actions.append(hots.argmax(axis=1)[0]); values = np.max((values,V_0(next_states_[:,[0,1,2]])),axis=0); if i % 20 == 0: print(i) else: for params,params_ in zip(F_PI[len(F_PI)-StepsLeft:],F_PI_[len(F_PI_)-StepsLeft:]): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])); for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])); tmp = ConvCosSin(next_states_); hots = sess.run(Tt,{states:tmp}); opt_a = Hot_to_Cold(hots,true_ac_list) if Noise == False: hots_ = sess.run(Tt_,{states_:tmp}); opt_b = Hot_to_Cold(hots_,true_ac_list_) else: hots_ = np.zeros((1,2**dist_ac)); hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_,true_ac_list_) for _ in range(subSamples): next_states_ = RK4(next_states_,dt/float(subSamples),opt_a,opt_b); traj.append(next_states_); actions.append(hots.argmax(axis=1)[0]); for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])); for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])); print(str(next_states_)) return traj,actions,values def ConvCosSin(ALL_x): sin_psi = np.sin(ALL_x[:,[6]]) cos_psi = np.cos(ALL_x[:,[6]]) pos = ALL_x[:,[0,1,2]]/5.0; vel = ALL_x[:,[3,4,5]]/10.0; ret_val = np.concatenate((pos,vel,sin_psi,cos_psi),axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time(); t = 0.0; mse = np.inf; k=0; kk = 0; beta=3.0; batch_size = bts; tau = 1000.0; steps = teps; ALL_PI = []; ALL_PI_= []; nunu = lr_schedule.value(k); act_color = ['r','g','b','y']; if(imp == 1.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies7D_P&Tcoupled_h60_h60_h60.pkl", "rb" ) ); cc = 0; while True: state_get = input('State: '); sub_smpl = input('SUBSAMPLING: '); pause_len = input('Pause: ') s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): ") stat = input("Static? (0/1): ") traj,act,value = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat); print(value) act.append(act[-1]); all_to = np.concatenate(traj); plt.scatter(all_to[:,[0]],all_to[:,[1]],color=act_color[cc % len(act_color)]) plt.pause(pause_len); cc = cc + 1; #plt.colorbar() elif(imp == 2.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies7D_P&Tcoupled_h60_h60_h60.pkl", "rb" ) ); cc = 0; dist_bound = input("Distance: ") state_get = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-1)); state_get[:,:3] = dist_bound*state_get[:,:3]/np.linalg.norm(state_get[:,:3],axis=1,keepdims=True) sub_smpl = input('SUBSAMPLING: '); s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") noise = input("Noise? (0/1): "); stat = input("Static? (0/1): "); traj,act,values = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat,justV=True); values = values + 1.0 print(values.shape) filt = (values < dist_bound).T[0]; print(filt.shape) subset = state_get[filt] print(len(subset)) plt.hist(values,bins=100) plt.pause(10) tracking_error_bound = np.max(abs(subset[:,:3]),axis=0) print(tracking_error_bound) print(subset) save_dict = {} save_dict["weights"]=(ALL_PI,ALL_PI_) save_dict["c_layers"]=layers1 save_dict["d_layers"]=layers_ save_dict["control_bounds_upper"]= max_list save_dict["control_bounds_lower"]= min_list save_dict["tracking_error_bound"]= tracking_error_bound save_dict["planner_params"]={"max_speed":[0.5,0.5,0.5],"max_vel_dist":[0.0,0.0,0.0],"max_acc_dist":[0.0,0.0,0.0]} save_dict["normalization_args"] = [5.0,5.0,5.0,10.0,10.0,10.0,-1] pickle.dump(save_dict,open( "TESTpolicies7Dubins_PT_h100_h100.pkl", "wb" )); elif(imp == 3.0): ALL_PI,ALL_PI_ = pickle.load( open( "policies7D_P&Tcoupled_h100_h100.pkl", "rb" ) ); fig = plt.figure() tmp = 1 vals = [] for i in range(1,len(ALL_PI),2): for j in range(1,len(ALL_PI_),2): state_get = np.array([[0.0,0.0,0.0,0.0,0.0,0.0,0.0]]) sub_smpl = 2 pause_len = 10 s_left = i noise = 0 stat = 1 traj,act,v = getTraj(state_get,F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=sub_smpl,StepsLeft=s_left,Noise=noise,Static=stat,disturb=j,steps=1000); vals.append(v[0][0]) all_to = np.concatenate(traj); ax = fig.add_subplot(len(ALL_PI)/2,len(ALL_PI_)/2,tmp) tmp = tmp + 1 ax.scatter(all_to[:,[0]],all_to[:,[2]]) plt.pause(1.0); vals = np.array(vals).reshape((10,10)) pickle.dump(vals,open( "avore.pkl", "wb" )); plt.pause(1000.0) cc = cc + 1; else: for i in xrange(iters): if(np.mod(i,renew) == 0 and i is not 0): ALL_PI.insert(0,sess.run(C_func_vars)); ALL_PI_.insert(0,sess.run(D_func_vars)); k = 0; ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-1)); ALL_x[:,[3,4,5]] = ALL_x[:,[3,4,5]]*2.0 ALL_x[:,[6]] = np.mod(ALL_x[:,[6]]*np.pi/5.0,2.0*np.pi); PI_c,PI_d = getPI(ALL_x,ALL_PI,ALL_PI_,subSamples=1); pre_ALL_x = ConvCosSin(ALL_x); ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0]-1)); ALL_x_[:,[3,4,5]] = ALL_x_[:,[3,4,5]]*2.0 ALL_x_[:,[6]] = np.mod(ALL_x_[:,[6]]*np.pi/5.0,2.0*np.pi); PI_c_,PI_d_ = getPI(ALL_x_,ALL_PI,ALL_PI_,subSamples=1); pre_ALL_x_ = ConvCosSin(ALL_x_); t = t - dt; print('Again.') #elif(i is 0): elif(np.mod(i,renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0,5.0,(nrolls,layers[0]-1)); ALL_x[:,[3,4,5]] = ALL_x[:,[3,4,5]]*2.0 ALL_x[:,[6]] = np.mod(ALL_x[:,[6]]*np.pi/5.0,2.0*np.pi); PI_c,PI_d = getPI(ALL_x,F_PI=[],F_PI_=[],subSamples=1); pre_ALL_x = ConvCosSin(ALL_x); elapsed = time.time() - t print("Compute Data Time = "+str(elapsed)) ALL_x_ = np.random.uniform(-5.0,5.0,(nrolls/100,layers[0]-1)); ALL_x_[:,[3,4,5]] = ALL_x_[:,[3,4,5]]*2.0 ALL_x_[:,[6]] = np.mod(ALL_x_[:,[6]]*np.pi/5.0,2.0*np.pi); PI_c_,PI_d_ = getPI(ALL_x_,F_PI=[],F_PI_=[],subSamples=1); pre_ALL_x_ = ConvCosSin(ALL_x_); # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if(np.mod(i,200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy,{states:pre_ALL_x,y:PI_c}); test_acc = sess.run(accuracy,{states:pre_ALL_x_,y:PI_c_}); train_acc_ = sess.run(accuracy_,{states_:pre_ALL_x,y_:PI_d}); test_acc_ = sess.run(accuracy_,{states_:pre_ALL_x_,y_:PI_d_}); #o = np.random.randint(len(ALL_x)); print str(i) + ") control | TR_ACC = " + str(train_acc) + " | TE_ACC = " + str(test_acc) + " | Learning Rate = " + str(nunu) print str(i) + ") disturb | TR_ACC = " + str(train_acc_) + " | TE_ACC = " + str(test_acc_) + " | Learning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001#/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts); sess.run(train_step, feed_dict={states:pre_ALL_x[tmp],y:PI_c[tmp],nu:nunu}); sess.run(train_step_, feed_dict={states_:pre_ALL_x[tmp],y_:PI_d[tmp],nu:nunu}); #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump([ALL_PI,ALL_PI_],open( "policies7D_P&Tcoupled_h100_h100.pkl", "wb" ));
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Constants # The choices of n0, d1, d0 actually results in a very large # steady state error in the pitch/roll; this seems to be # expected according to Pat's report n0 = 10 # Angular dynamics parameters d1 = 8 d0 = 10 kT = 1.0 #0.91 # Thrust coefficient (vertical direction) grav = 9.81 # Acceleration due to gravity (for convenience) m = 1.3 # Mass # Quad Params max_list = [1, 1, 2.0 * grav] min_list = [-1, -1, 0.0] max_list_ = [0.5, 0.5, 0.5] min_list_ = [-0.5, -0.5, -0.5] g = 9.81 print 'Starting worker-' + str(ind) f = 1 Nx = 100 * f + 1 minn = [-5.0, -10.0, -5.0, -10.0, 0.0, -10.0] maxx = [5.0, 10.0, 5.0, 10.0, 2 * np.pi, 10.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[2], maxx[2], Nx) Z = np.linspace(minn[4], maxx[4], Nx) X_, Y_, Z_ = np.meshgrid(X, Y, Z) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) XX_ = np.reshape(X_, [-1, 1]) YY_ = np.reshape(Y_, [-1, 1]) ZZ_ = np.reshape(Z_, [-1, 1]) grid_check = np.concatenate((XX_, np.ones( XX_.shape), YY_, np.ones(XX_.shape), ZZ_, np.zeros(XX_.shape)), axis=1) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) # Calculate number of parameters of the policy nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor sub_sys = [] ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.1 #VAR num_ac = 3 iters = int(np.abs(t_hor) / dt) * renew + 1 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Control", False, layers) states_, y_, Tt_, L_, l_r_, lb_, reg_, cross_entropy_ = TransDef( "Disturbance", False, layers) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) ola1_ = tf.argmax(Tt_, dimension=1) ola2_ = tf.argmax(y_, dimension=1) ola3_ = tf.equal(ola1_, ola2_) accuracy_ = tf.reduce_mean(tf.cast(ola3_, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); C_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Control') D_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Disturbance') #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(C_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) train_step_ = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L_) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): #return np.linalg.norm(x,ord=1,axis=1,keepdims=True) - 1.0 return np.linalg.norm(x, axis=1, keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): # #Positions # col1 = ALL_x[:,3,None] - opt_b[:,0,None] # col2 = ALL_x[:,4,None] - opt_b[:,1,None] # col3 = ALL_x[:,5,None] - opt_b[:,2,None] # #Velocities # col4 = np.multiply(opt_a[:,2,None],np.tan(ALL_x[:,6,None])) # col5 = -np.multiply(opt_a[:,2,None],np.tan(ALL_x[:,7,None])) # col6 = opt_a[:,2,None] - g # #Angles # col7 = opt_a[:,0,None]; # col8 = opt_a[:,1,None]; col1 = ALL_x[:, 1, None] - opt_b[:, 0, None] #position x col2 = np.multiply(opt_a[:, 2, None], np.tan(ALL_x[:, 2, None])) #velocity x col3 = opt_a[:, 0, None] #-d1*ALL_x[:,1,None] + opt_a[:,0,None]; #angle th_x col5 = ALL_x[:, 4, None] - opt_b[:, 1, None] #position y col6 = -np.multiply(opt_a[:, 2, None], np.tan( ALL_x[:, 5, None])) #velocity y col7 = opt_a[:, 1, None] #-d1*ALL_x[:,4,None] + opt_a[:,1,None]; #angle th_y col9 = ALL_x[:, 7, None] - opt_b[:, 2, None] #position z col10 = kT * opt_a[:, 2, None] - grav #velocity z return np.concatenate( (col1, col2, col3, col5, col6, col7, col9, col10), axis=1) ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): #Try Euler k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, [2, 5]] = p_corr(ALL_tmp[:, [2, 5]]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, [2, 5]] = p_corr(ALL_tmp[:, [2, 5]]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, [2, 5]] = p_corr(ALL_tmp[:, [2, 5]]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) #np.multiply(dtt,k1) ALL_tmp[:, [2, 5]] = p_corr(ALL_tmp[:, [2, 5]]) return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [(tmp1 == 1) * tmp3 + (tmp1 == -1) * tmp2 for tmp1, tmp2, tmp3 in zip(ac_tuple, min_list, max_list)] true_ac_list.append(ac_list) dist_ac = 3 perms_ = list(itertools.product([-1, 1], repeat=dist_ac)) true_ac_list_ = [] for i in range(len(perms_)): #2**num_actions ac_tuple_ = perms_[i] ac_list_ = [ (tmp1 == 1) * tmp3 + (tmp1 == -1) * tmp2 for tmp1, tmp2, tmp3 in zip(ac_tuple_, min_list_, max_list_) ] #ASSUMING: aMax = -aMin true_ac_list_.append(ac_list_) def Hot_to_Cold(hots, ac_list): a = hots.argmax(axis=1) a = np.asarray([ac_list[i] for i in a]) return a def getPI( ALL_x, F_PI=[], F_PI_=[], subSamples=1 ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(C_func_vars) current_params_ = sess.run(D_func_vars) #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states_ = [] for k in range((len(perms))): next_states = [] opt_a = np.asarray(true_ac_list[k]) * np.ones([ALL_x.shape[0], 1]) for i in range(len(perms_)): opt_b = np.asarray(true_ac_list_[i]) * np.ones( [ALL_x.shape[0], 1]) Snx = ALL_x for _ in range(subSamples): Snx = RK4(Snx, dt / float(subSamples), opt_a, opt_b) next_states.append(Snx) next_states_.append(np.concatenate(next_states, axis=0)) next_states_ = np.concatenate(next_states_, axis=0) values = V_0(next_states_[:, [0, 3, 6]]) for params, params_ in zip(F_PI, F_PI_): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])) for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])) tmp = ConvCosSin(next_states_) hots = sess.run(Tt, {states: tmp}) opt_a = Hot_to_Cold(hots, true_ac_list) hots = sess.run(Tt_, {states_: tmp}) opt_b = Hot_to_Cold(hots, true_ac_list_) for _ in range(subSamples): next_states_ = RK4(next_states_, dt / float(subSamples), opt_a, opt_b) values = np.max((values, V_0(next_states_[:, [0, 3, 6]])), axis=0) values_ = values #V_0(next_states_[:,[0,1,2]]); pre_compare_vals_ = values_.reshape([-1, ALL_x.shape[0]]).T #Changed to values instead of values_ final_v = [] final_v_ = [] per = len(perms) for k in range(len(perms_)): final_v.append( np.argmax(pre_compare_vals_[:, k * per:(k + 1) * per, None], axis=1)) final_v_.append( np.max(pre_compare_vals_[:, k * per:(k + 1) * per, None], axis=1)) finalF = np.concatenate(final_v_, axis=1) index_best_a_ = np.argmin(finalF, axis=1) finalF_ = np.concatenate(final_v, axis=1) index_best_b_ = np.array( [finalF_[k, index_best_a_[k]] for k in range(len(index_best_a_))]) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])) for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])) return sess.run(make_hot, {hot_input: index_best_a_}), sess.run( make_hot, {hot_input: index_best_b_}) def getTraj(ALL_x, F_PI=[], F_PI_=[], subSamples=1, StepsLeft=None, Noise=False, Static=False, justV=False): current_params = sess.run(C_func_vars) current_params_ = sess.run(D_func_vars) if (StepsLeft == None): StepsLeft = len(F_PI) next_states_ = ALL_x traj = [next_states_] actions = [] values = V_0(next_states_[:, [0, 1, 2]]) if Static: steps = input("How Many Steps? ") for ind in range(len( F_PI[len(F_PI) - StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(F_PI[len(F_PI) - StepsLeft][ind])) for ind in range(len( F_PI_[len(F_PI_) - StepsLeft])): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(F_PI_[len(F_PI_) - StepsLeft][ind])) for i in range(steps): for _ in range(subSamples): tmp = ConvCosSin(next_states_) hots = sess.run(Tt, {states: tmp}) opt_a = Hot_to_Cold(hots, true_ac_list) if Noise == False: hots_ = sess.run(Tt_, {states_: tmp}) opt_b = Hot_to_Cold(hots_, true_ac_list_) else: hots_ = np.zeros((1, 2**dist_ac)) hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_, true_ac_list_) next_states_ = RK4(next_states_, dt / float(subSamples), opt_a, opt_b) if not justV: traj.append(next_states_) actions.append(hots.argmax(axis=1)[0]) values = np.max((values, V_0(next_states_[:, [0, 1, 2]])), axis=0) if i % 20 == 0: print(i) else: for params, params_ in zip(F_PI[len(F_PI) - StepsLeft:], F_PI_[len(F_PI_) - StepsLeft:]): for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(params[ind])) for ind in range(len(params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(params_[ind])) tmp = ConvCosSin(next_states_) hots = sess.run(Tt, {states: tmp}) opt_a = Hot_to_Cold(hots, true_ac_list) if Noise == False: hots_ = sess.run(Tt_, {states_: tmp}) opt_b = Hot_to_Cold(hots_, true_ac_list_) else: hots_ = np.zeros((1, 2**dist_ac)) hots_[0][np.random.randint(2**dist_ac)] = 1 opt_b = Hot_to_Cold(hots_, true_ac_list_) for _ in range(subSamples): next_states_ = RK4(next_states_, dt / float(subSamples), opt_a, opt_b) traj.append(next_states_) actions.append(hots.argmax(axis=1)[0]) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(C_func_vars[ind].assign(current_params[ind])) for ind in range(len(current_params_)): #Reload pi*(x,t+dt) parameters sess.run(D_func_vars[ind].assign(current_params_[ind])) print(str(next_states_)) return traj, actions, values def ConvCosSin(ALL_x): sin_phi = np.sin(ALL_x[:, [2, 5]]) cos_phi = np.cos(ALL_x[:, [2, 5]]) pos = ALL_x[:, [0, 3, 6]] vel = ALL_x[:, [1, 4, 7]] ret_val = np.concatenate((pos, vel, sin_phi, cos_phi), axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] ALL_PI_ = [] nunu = lr_schedule.value(k) act_color = ['r', 'g', 'b', 'y'] if (imp == 1.0): ALL_PI, ALL_PI_ = pickle.load( open("policies6D_P&T.9769 | TE_ACC = 0.94 | Lc_h40_h40.pkl", "rb")) cc = 0 while True: state_get = input('State: ') sub_smpl = input('SUBSAMPLING: ') pause_len = input('Pause: ') s_left = input("How many steps left to go (max. " + str(len(ALL_PI)) + ")? -> ") noise = input("Noise? (0/1): ") stat = input("Static? (0/1): ") traj, act, _ = getTraj(state_get, F_PI=ALL_PI, F_PI_=ALL_PI_, subSamples=sub_smpl, StepsLeft=s_left, Noise=noise, Static=stat) act.append(act[-1]) all_to = np.concatenate(traj) plt.scatter(all_to[:, [0]], all_to[:, [2]], color=act_color[cc % len(act_color)]) plt.pause(pause_len) cc = cc + 1 #plt.colorbar() elif (imp == 2.0): ALL_PI, ALL_PI_ = pickle.loadsess.run(set_to_not_zero) (open("policies6D_P&Tc_h40_h40.pkl", "rb")) cc = 0 dist_bound = input("Distance: ") state_get = np.random.uniform(-5.0, 5.0, (nrolls, layers[0])) state_get[:, :3] = dist_bound * state_get[:, :3] / np.linalg.norm( state_get[:, :3], axis=1, keepdims=True) sub_smpl = input('SUBSAMPLING: ') s_left = input("How many steps left to go (max. " + str(len(ALL_PI)) + ")? -> ") noise = input("Noise? (0/1): ") stat = input("Static? (0/1): ") traj, act, values = getTraj(state_get, F_PI=ALL_PI, F_PI_=ALL_PI_, subSamples=sub_smpl, StepsLeft=s_left, Noise=noise, Static=stat, justV=True) values = values + 1.0 print(values.shape) filt = (values < dist_bound).T[0] print(filt.shape) subset = state_get[filt] print(len(subset)) plt.hist(values, bins=100) plt.pause(10) tracking_error_bound = np.max(abs(subset[:, :3]), axis=0) print(tracking_error_bound) print(subset) save_dict = {} save_dict["weights"] = (ALL_PI, ALL_PI_) save_dict["layers"] = layers1 save_dict["control_bounds_upper"] = max_list save_dict["control_bounds_lower"] = min_list save_dict["tracking_error_bound"] = tracking_error_bound save_dict["planner_params"] = { "max_speed": [0.5, 0.5, 0.5], "max_vel_dist": [0.0, 0.0, 0.0], "max_acc_dist": [0.0, 0.0, 0.0] } pickle.dump(save_dict, open("policies6D_PT_h40_h40.pkl", "wb")) else: for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(C_func_vars)) ALL_PI_.insert(0, sess.run(D_func_vars)) # plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... -> Solved: colors in a scatter plot only depend on the labels # plt.clf(); # ALL_xx = np.array([[-1.0,0.0,1.0,0.0,0.0,0.0], # [1.0,0.0,1.0,0.0,0.0,0.0], # [1.0,0.0,-1.0,0.0,0.0,0.0], # [-1.0,0.0,-1.0,0.0,0.0,0.0]]); # for tmmp in range(ALL_xx.shape[0]): # traj,act,_ = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,F_PI_=ALL_PI_,subSamples=10); # #act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]])#c=[act_color[ii] for ii in act]); # plt.pause(0.25) t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 2)) ALL_x[:, [1, 4, 7]] = ALL_x[:, [1, 4, 7]] ALL_x[:, [2, 5]] = np.mod(ALL_x[:, [2, 5]] * np.pi / 20.0, 2.0 * np.pi) PI_c, PI_d = getPI(ALL_x, ALL_PI, ALL_PI_, subSamples=1) pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 2)) ALL_x_[:, [1, 4, 7]] = ALL_x_[:, [1, 4, 7]] ALL_x_[:, [2, 5]] = np.mod(ALL_x_[:, [2, 5]] * np.pi / 20.0, 2.0 * np.pi) PI_c_, PI_d_ = getPI(ALL_x_, ALL_PI, ALL_PI_, subSamples=1) pre_ALL_x_ = ConvCosSin(ALL_x_) #sess.run(set_to_not_zero); t = t - dt print('Again.') elif (np.mod(i, renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 2)) ALL_x[:, [1, 4, 7]] = ALL_x[:, [1, 4, 7]] ALL_x[:, [2, 5]] = np.mod(ALL_x[:, [2, 5]] * np.pi / 20.0, 2.0 * np.pi) PI_c, PI_d = getPI(ALL_x, F_PI=[], F_PI_=[], subSamples=1) pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 2)) ALL_x_[:, [1, 4, 7]] = ALL_x_[:, [1, 4, 7]] ALL_x_[:, [2, 5]] = np.mod(ALL_x_[:, [2, 5]] * np.pi / 20.0, 2.0 * np.pi) PI_c_, PI_d_ = getPI(ALL_x_, F_PI=[], F_PI_=[], subSamples=1) pre_ALL_x_ = ConvCosSin(ALL_x_) # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: pre_ALL_x, y: PI_c }) test_acc = sess.run(accuracy, { states: pre_ALL_x_, y: PI_c_ }) train_acc_ = sess.run(accuracy_, { states_: pre_ALL_x, y_: PI_d }) test_acc_ = sess.run(accuracy_, { states_: pre_ALL_x_, y_: PI_d_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") control | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Learning Rate = " + str(nunu) print str(i) + ") disturb | TR_ACC = " + str( train_acc_) + " | TE_ACC = " + str( test_acc_) + " | Learning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.001 #/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: pre_ALL_x[tmp], y: PI_c[tmp], nu: nunu }) sess.run(train_step_, feed_dict={ states_: pre_ALL_x[tmp], y_: PI_d[tmp], nu: nunu }) #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu});vs pickle.dump([ALL_PI, ALL_PI_], open("policies10D_P&Tc_h170_h170.pkl", "wb"))
def main(layers, t_hor, ind, nrolls, bts, ler_r, mom, teps, renew, imp, q): # Quad Params u1_max = 0.17 u1_min = 0 u2_max = 0.017 u2_min = 0 u3_max = 0.017 u3_min = 0 u4_max = 0.017 u4_min = 0 max_list = [u1_max, u2_max, u3_max, u4_max] min_list = [u1_min, u2_min, u3_min, u4_min] I = [0.0224, 0.0224, 0.0436] Ix = I[0] Iy = I[1] Iz = I[2] m = 0.65 L_ = 0.156 g = 9.8 print 'Starting worker-' + str(ind) f = 1 Nx = 100 * f + 1 minn = [-5.0, -10.0, -5.0, -10.0, 0.0, -10.0] maxx = [5.0, 10.0, 5.0, 10.0, 2 * np.pi, 10.0] X = np.linspace(minn[0], maxx[0], Nx) Y = np.linspace(minn[2], maxx[2], Nx) Z = np.linspace(minn[4], maxx[4], Nx) X_, Y_, Z_ = np.meshgrid(X, Y, Z) X, Y = np.meshgrid(X, Y) XX = np.reshape(X, [-1, 1]) YY = np.reshape(Y, [-1, 1]) XX_ = np.reshape(X_, [-1, 1]) YY_ = np.reshape(Y_, [-1, 1]) ZZ_ = np.reshape(Z_, [-1, 1]) grid_check = np.concatenate((XX_, np.ones( XX_.shape), YY_, np.ones(XX_.shape), ZZ_, np.zeros(XX_.shape)), axis=1) grid_eval = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_eval__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 6.0 * np.ones(XX.shape)), axis=1) grid_evall = np.concatenate( (XX, YY, 0.0 * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall_ = np.concatenate( (XX, YY, (2.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) grid_evall__ = np.concatenate( (XX, YY, (4.0 / 3.0) * np.pi * np.ones(XX.shape), 12.0 * np.ones(XX.shape)), axis=1) # Calculate number of parameters of the policy nofparams = 0 for i in xrange(len(layers) - 1): nofparams += layers[i] * layers[i + 1] + layers[i + 1] print 'Number of Params is: ' + str(nofparams) H_length = t_hor center = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) depth = 2.0 incl = 1.0 ##################### DEFINITIONS ##################### #layers = [2 + 1,10,1]; #VAR #ssize = layers[0] - 1; dt = 0.05 #VAR num_ac = 4 iters = int(np.abs(t_hor) / dt) * renew + 1 ##################### INSTANTIATIONS ################# states, y, Tt, L, l_r, lb, reg, cross_entropy = TransDef( "Critic", False, layers, depth, incl, center) ola1 = tf.argmax(Tt, dimension=1) ola2 = tf.argmax(y, dimension=1) ola3 = tf.equal(ola1, ola2) accuracy = tf.reduce_mean(tf.cast(ola3, tf.float32)) #a_layers = layers; #a_layers[-1] = 2; #We have two actions #states_,y_,Tt_,l_r_,lb_,reg_ = TransDef("Actor",False,a_layers,depth,incl,center,outp=True); V_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Critic') #A_func_vars = tf.get_collection(tf.GraphKeys.VARIABLES, scope='Actor'); #var_grad = tf.gradients(Tt_,states_)[0] var_grad_ = tf.gradients(Tt, states)[0] grad_x = tf.slice(var_grad_, [0, 0], [-1, layers[0] - 1]) #theta = tf.trainable_variables(); set_to_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_zero.append(var.assign(tf.zeros(tf.shape(var)))) set_to_zero = tf.group(*set_to_zero) set_to_not_zero = [] for var in sorted(V_func_vars, key=lambda v: v.name): set_to_not_zero.append( var.assign( tf.random_uniform(tf.shape(var), minval=-0.1, maxval=0.1))) set_to_not_zero = tf.group(*set_to_not_zero) # DEFINE LOSS lmbda = 0.0 #1.0**(-3.5);#0.01; beta = 0.00 #L = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y,Tt)),1,keep_dims=True))) + beta*tf.reduce_mean(tf.reduce_max(tf.abs(grad_x),reduction_indices=1,keep_dims=True)); #L = tf.reduce_mean(tf.mul(tf.exp(imp*t_vec),tf.abs(tf.sub(y,Tt)))) + lmbda*reg; #L = tf.reduce_mean(tf.abs(tf.sub(y,Tt))) + lmbda*reg; # DEFINE OPTIMIZER #nu = 5.01; #nunu = ler_r;#0.00005; nu = tf.placeholder(tf.float32, shape=[]) #VAR #lr_multiplier = ler_r lr_schedule = PiecewiseSchedule([ (0, 0.1), (10000, 0.01), (20000, 0.001), (30000, 0.0001), ], outside_value=0.0001) #optimizer = tf.train.GradientDescentOptimizer(nu) #optimizer #train_step = tf.train.MomentumOptimizer(learning_rate=nu,momentum=mom).minimize(L) #optimizer #train_step = tf.train.AdamOptimizer(learning_rate=nu).minimize(L); train_step = tf.train.RMSPropOptimizer(learning_rate=nu, momentum=mom).minimize(L) #optimizer = tf.train.RMSPropOptimizer(learning_rate=nu,momentum=mom); #gvs = optimizer.compute_gradients(L,theta); #capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs]; #train_step = optimizer.apply_gradients(gvs); #train_step = tf.train.AdagradOptimizer(learning_rate=nu,initial_accumulator_value=0.5).minimize(L); hot_input = tf.placeholder(tf.int64, shape=(None)) make_hot = tf.one_hot(hot_input, 2**num_ac, on_value=1, off_value=0) # INITIALIZE GRAPH theta = tf.trainable_variables() sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) def V_0(x): return np.linalg.norm(x, ord=np.inf, axis=1, keepdims=True) - 1.0 #return np.linalg.norm(x,axis=1,keepdims=True) - 1.0 def p_corr(ALL_x): ALL_x = np.mod(ALL_x, 2.0 * np.pi) return ALL_x def F(ALL_x, opt_a, opt_b): #(grad,ALL_x): cos_phi = np.cos(ALL_x[:, 6, None]) sin_phi = np.sin(ALL_x[:, 6, None]) cos_the = np.cos(ALL_x[:, 7, None]) sin_the = np.sin(ALL_x[:, 7, None]) tan_the = np.tan(ALL_x[:, 7, None]) cos_psi = np.cos(ALL_x[:, 8, None]) sin_psi = np.sin(ALL_x[:, 8, None]) col1 = ALL_x[:, 3, None] col2 = ALL_x[:, 4, None] col3 = ALL_x[:, 5, None] col4 = -(cos_phi * sin_the * cos_psi + sin_phi * sin_psi) * opt_a[:, 0, None] / m col5 = -(cos_phi * sin_the * cos_psi - sin_phi * cos_psi) * opt_a[:, 0, None] / m col6 = g - (cos_phi * cos_the) * opt_a[:, 0, None] / m col7 = ALL_x[:, 9, None] + sin_phi * tan_the * ALL_x[:, 10, None] + cos_phi * tan_the * ALL_x[:, 11, None] col8 = cos_phi * ALL_x[:, 10, None] - sin_phi * ALL_x[:, 11, None] col9 = (sin_phi / cos_phi) * ALL_x[:, 10, None] + ( cos_phi / cos_the) * ALL_x[:, 11, None] col10 = ALL_x[:, 10, None] * ALL_x[:, 11, None] * ( (Iy - Iz) / Ix) + (L_ / Ix) * opt_a[:, 1, None] col11 = ALL_x[:, 9, None] * ALL_x[:, 11, None] * ( (Iz - Ix) / Iy) + (L_ / Iy) * opt_a[:, 2, None] col12 = ALL_x[:, 9, None] * ALL_x[:, 10, None] * ( (Ix - Iy) / Iz) + (L_ / Iz) * opt_a[:, 3, None] return np.concatenate((col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12), axis=1) # \dot x x_1 = x_4 # \dot y x_2 = x_5 # \dot z x_3 = x_6 # \dot vx x_4 = -(\cos x_7 \sin x_8 \cos x_9 + \sin x_7 \sin x_9) u_1/m # \dot vy x_5 = -(\cos x_7 \sin x_8 \sin x_9 - \sin x_7 \cos x_9) u_1/m # \dot vz x_6 = g - (\cos x_7 \cos x_8) u_1/m # \dot phi x_7 = x_10 + \sin x_7 \tan(x_8) x_11 + \cos x_7 \tan(x_8) x_12 # \dot the x_8 = \cos x_7 x_11 - \sin x_7 x_12 # \dot psi x_9 = (\sin x_7/\cos x_8)*x_11 + (\cos x_7/\cos x_8) x_12 <--------- # \dot wphi x_10 = x_11 x_12 (I_y - I_z)/I_x + L/I_x u_2 # \dot wthe x_11 = x_10 x_12 (I_z - I_x)/I_y + L/I_y u_3 # \dot wpsi x_12 = x_10 x_11 (I_x - I_y)/I_z + 1/I_z u_4 ####################### RECURSIVE FUNC #################### def RK4(ALL_x, dtt, opt_a, opt_b): k1 = F(ALL_x, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k2) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k1) ALL_tmp[:, [6, 7, 8]] = p_corr(ALL_tmp[:, [6, 7, 8]]) k2 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k3) ALL_tmp = ALL_x + np.multiply(dtt / 2.0, k2) ALL_tmp[:, [6, 7, 8]] = p_corr(ALL_tmp[:, [6, 7, 8]]) k3 = F(ALL_tmp, opt_a, opt_b) #### !!! # ~~~~ Compute optimal input (k4) ALL_tmp = ALL_x + np.multiply(dtt, k3) ALL_tmp[:, [6, 7, 8]] = p_corr(ALL_tmp[:, [6, 7, 8]]) k4 = F(ALL_tmp, opt_a, opt_b) #### !!! Snx = ALL_x + np.multiply((dtt / 6.0), (k1 + 2.0 * k2 + 2.0 * k3 + k4)) #np.multiply(dtt,k1) Snx[:, [6, 7, 8]] = p_corr(Snx[:, [6, 7, 8]]) return Snx perms = list(itertools.product([-1, 1], repeat=num_ac)) true_ac_list = [] for i in range(len(perms)): #2**num_actions ac_tuple = perms[i] ac_list = [(tmp1 == 1) * tmp3 + (tmp1 == -1) * tmp2 for tmp1, tmp2, tmp3 in zip(ac_tuple, min_list, max_list)] true_ac_list.append(ac_list) def Hot_to_Cold(hots, ac_list): a = hots.argmax(axis=1) a = np.asarray([ac_list[i] for i in a]) return a def getPI( ALL_x, F_PI=[], subSamples=1 ): #Things to keep in MIND: You want the returned value to be the minimum accross a trajectory. current_params = sess.run(theta) #perms = list(itertools.product([-1,1], repeat=num_ac)) next_states = [] for i in range(len(perms)): opt_a = np.asarray(true_ac_list[i]) * np.ones([ALL_x.shape[0], 1]) Snx = ALL_x for _ in range(subSamples): Snx = RK4(Snx, dt / float(subSamples), opt_a, None) next_states.append(Snx) next_states = np.concatenate(next_states, axis=0) values = V_0(next_states[:, [0, 1, 2]]) for params in F_PI: for ind in range(len(params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(params[ind])) hots = sess.run(Tt, {states: ConvCosSin(next_states)}) opt_a = Hot_to_Cold(hots, true_ac_list) for _ in range(subSamples): next_states = RK4(next_states, dt / float(subSamples), opt_a, None) values = np.min((values, V_0(next_states[:, [0, 1, 2]])), axis=0) values_ = V_0(next_states[:, [0, 1, 2]]) compare_vals_ = values_.reshape([-1, ALL_x.shape[0]]).T #Changed to values instead of values_ index_best_a_ = compare_vals_.argmin(axis=1) #Changed to ARGMIN values_ = np.min(compare_vals_, axis=1, keepdims=True) # filterr = np.min(compare_vals_,axis=1) < 0.0 # index_best_a_ = index_best_a_[filterr] # values_ = values_[filterr] # print("States filtered out: "+str(len(filterr)-np.sum(filterr))) for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters sess.run(theta[ind].assign(current_params[ind])) return sess.run(make_hot, {hot_input: index_best_a_}), values_, 0 #filterr # def getTraj(ALL_x,F_PI=[],subSamples=1,StepsLeft=None,Noise = False): # # current_params = sess.run(theta); # # if(StepsLeft == None): StepsLeft = len(F_PI); # # next_states = ALL_x; # traj = [next_states]; # actions = []; # # for params in F_PI[len(F_PI)-StepsLeft:]: # for ind in range(len(params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(params[ind])); # # hots = sess.run(Tt,{states:ConvCosSin(next_states)}); # opt_a = Hot_to_Cold(hots,true_ac_list) # for _ in range(subSamples): # next_states = RK4(next_states,dt/float(subSamples),opt_a,None); # if Noise: # next_states = next_states + np.random.normal(size=next_states.shape)*0.01 # traj.append(next_states); # actions.append(hots.argmax(axis=1)[0]); # #values = np.min((values,V_0(next_states[:,[0,1]])),axis=0); # # for ind in range(len(current_params)): #Reload pi*(x,t+dt) parameters # sess.run(theta[ind].assign(current_params[ind])); # # return traj,V_0(next_states[:,[0,2]]),actions; def ConvCosSin(ALL_x): cos_phi = np.cos(ALL_x[:, 6, None]) sin_phi = np.sin(ALL_x[:, 6, None]) cos_the = np.cos(ALL_x[:, 7, None]) sin_the = np.sin(ALL_x[:, 7, None]) cos_psi = np.cos(ALL_x[:, 8, None]) sin_psi = np.sin(ALL_x[:, 8, None]) pos = ALL_x[:, [0, 1, 2]] / 5.0 vel = ALL_x[:, [3, 4, 5]] / 10.0 arate = ALL_x[:, [9, 10, 11]] / 30.0 ret_val = np.concatenate((pos, vel, arate, cos_phi, sin_phi, cos_the, sin_the, cos_psi, sin_psi), axis=1) return ret_val # ***************************************************************************** # # ============================= MAIN LOOP ==================================== # # ***************************************************************************** t1 = time.time() t = 0.0 mse = np.inf k = 0 kk = 0 beta = 3.0 batch_size = bts tau = 1000.0 steps = teps ALL_PI = [] nunu = lr_schedule.value(k) # act_color = ['r','g','b','y']; # if(imp == 1.0): # ALL_PI = pickle.load( open( "policies6D_C&D_h30_h30.pkl", "rb" ) ); # while (imp == 1.0): # state_get = input('State: '); # sub_smpl = input('SUBSAMPLING: '); # pause_len = input('Pause: ') # s_left = input("How many steps left to go (max. "+str(len(ALL_PI))+")? -> ") # traj,VAL,act = getTraj(state_get,F_PI=ALL_PI,subSamples=sub_smpl,StepsLeft=s_left,Noise=False); # act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]],c=[act_color[i] for i in act]) # #plt.colorbar() # plt.pause(pause_len) # print(str(VAL)); for i in xrange(iters): if (np.mod(i, renew) == 0 and i is not 0): ALL_PI.insert(0, sess.run(theta)) # fig = plt.figure(1) # plt.clf(); # _,nn_vals,_ = getTraj(grid_check,ALL_PI,20) # fi = (np.abs(nn_vals) < 0.05) # mini_reach_ = grid_check[fi[:,0]] # ax = fig.add_subplot(111, projection='3d') # ax.scatter(mini_reach_[:,0], mini_reach_[:,2], mini_reach_[:,4]); # plt.pause(0.25); # plt.figure(2) #TODO: Figure out why facing up vs facing down has same action... -> Solved: colors in a scatter plot only depend on the labels # plt.clf(); # ALL_xx = np.array([[0.0,0.0,0.0,0.0,0.0,0.0], # [0.0,0.0,1.0,0.0,np.pi/4,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 - 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.3,0.0], # [0.0,0.0,1.0,0.0,np.pi/2 + 0.7,0.0], # [0.0,0.0,1.0,0.0,np.pi,0.0]]); # for tmmp in range(ALL_xx.shape[0]): # traj,_,act = getTraj(ALL_xx[[tmmp],:],F_PI=ALL_PI,subSamples=10); # act.append(act[-1]); # all_to = np.concatenate(traj); # plt.scatter(all_to[:,[0]],all_to[:,[2]],c=act); # plt.pause(0.25) t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 3)) ALL_x[:, [3, 4, 5]] = ALL_x[:, [3, 4, 5]] ALL_x[:, [6, 7]] = np.mod(ALL_x[:, [6, 7]] * np.pi / 20.0, 2.0 * np.pi) ALL_x[:, [8]] = ALL_x[:, [8]] * np.pi / 5.0 + np.pi ALL_x[:, [9, 10, 11]] = ALL_x[:, [9, 10, 11]] PI, _, filterr = getPI(ALL_x, ALL_PI, subSamples=1) #ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 3)) ALL_x_[:, [3, 4, 5]] = ALL_x_[:, [3, 4, 5]] * 2.0 ALL_x_[:, [6, 7]] = np.mod(ALL_x_[:, [6, 7]] * np.pi / 20.0, 2.0 * np.pi) ALL_x_[:, [8]] = ALL_x_[:, [8]] * np.pi / 5.0 + np.pi ALL_x_[:, [9, 10, 11]] = ALL_x_[:, [9, 10, 11]] PI_, _, filterr = getPI(ALL_x_, ALL_PI, subSamples=1) #ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_) t = t - dt print('Again.') elif (np.mod(i, renew) == 0 and i is 0): # sess.run(set_to_zero); t = time.time() ALL_x = np.random.uniform(-5.0, 5.0, (nrolls, layers[0] - 3)) ALL_x[:, [3, 4, 5]] = ALL_x[:, [3, 4, 5]] ALL_x[:, [6, 7]] = np.mod(ALL_x[:, [6, 7]] * np.pi / 20.0, 2.0 * np.pi) ALL_x[:, [8]] = ALL_x[:, [8]] * np.pi / 5.0 + np.pi ALL_x[:, [9, 10, 11]] = ALL_x[:, [9, 10, 11]] PI, _, filterr = getPI(ALL_x, F_PI=[], subSamples=1) #ALL_x = ALL_x[filterr] pre_ALL_x = ConvCosSin(ALL_x) elapsed = time.time() - t print("Compute Data Time = " + str(elapsed)) ALL_x_ = np.random.uniform(-5.0, 5.0, (nrolls / 100, layers[0] - 3)) ALL_x_[:, [3, 4, 5]] = ALL_x_[:, [3, 4, 5]] ALL_x_[:, [6, 7]] = np.mod(ALL_x_[:, [6, 7]] * np.pi / 20.0, 2.0 * np.pi) ALL_x_[:, [8]] = ALL_x_[:, [8]] * np.pi / 5.0 + np.pi ALL_x_[:, [9, 10, 11]] = ALL_x_[:, [9, 10, 11]] PI_, _, filterr = getPI(ALL_x_, F_PI=[], subSamples=1) #ALL_x_ = ALL_x_[filterr] pre_ALL_x_ = ConvCosSin(ALL_x_) # sess.run(set_to_not_zero); # |||||||||||| ---- PRINT ----- |||||||||||| if (np.mod(i, 200) == 0): #xel = sess.run(L,{states:ALL_x,y:PI}); #test_e = sess.run(L,{states:ALL_x_,y:PI_}); train_acc = sess.run(accuracy, { states: pre_ALL_x, y: PI }) test_acc = sess.run(accuracy, { states: pre_ALL_x_, y: PI_ }) #o = np.random.randint(len(ALL_x)); print str(i) + ") | TR_ACC = " + str( train_acc) + " | TE_ACC = " + str( test_acc) + " | Lerning Rate = " + str(nunu) #print str(i) + ") | XEL = " + str(xel) + " | Test_E = " + str(test_e) + " | Lerning Rate = " + str(nunu) #print str(PI[[o],:]) + " || " + str(sess.run(l_r[-1],{states:ALL_x[[o],:]})) #+ " || " + str(sess.run(gvs[-1],{states:ALL_x,y:PI})) nunu = 0.01 #/(np.sqrt(np.mod(i,renew))+1.0)#lr_schedule.value(i); #nunu = ler_r/(np.mod(i,renew)+1.0); tmp = np.random.randint(len(ALL_x), size=bts) sess.run(train_step, feed_dict={ states: pre_ALL_x[tmp], y: PI[tmp], nu: nunu }) #tmp = np.random.randint(len(reach100s), size=bts); #sess.run(train_step, feed_dict={states:reach100s[tmp,:-1],y:reach100s[tmp,-1,None],nu:nunu}); pickle.dump(ALL_PI, open("policies6Dreach_h50.pkl", "wb"))