def learn_tran_regression(num_learners, tree_depth): #learns a transition function from data using adaboost. #------------------------------------------------------ #Load all data examples = loadFile2() examples2 = loadFile() tot_examples = examples + examples2 #Folds get generated here train_examples = tot_examples #test_examples = tot_examples[-2::] dimensions = examples[0].states.shape[1] #Build X which is the same for all regressors estimators = [] X = np.concatenate([ np.hstack((example.states[:-1, :], example.actions[:-1, :])) for example in train_examples ], axis=0) y = np.concatenate([example.states[1:, :] for example in train_examples], axis=0) estimators = [ adaboost_reg(X, y[:, i], num_learners, tree_depth) for i in range(dimensions) ] return estimators
def learn_correction(num_learners, tree_depth): def plot_fit(): f, axarr = plt.subplots(2, sharex=True) x = range(diff_test.shape[0]) axarr[0].plot(x, diff_test[:, 0]) axarr[0].plot(x, fit[0, :], color='red', alpha=0.6) axarr[0].set_xlabel("Data Sample") axarr[0].set_ylabel("sin(angle difference)") axarr[1].plot(x, diff_test[:, 1]) axarr[1].plot(x, fit[1, :], color='red', alpha=0.6) axarr[1].set_xlabel("Data Sample") axarr[1].set_ylabel("Distance Difference") #learns a transition function from data using adaboost. #------------------------------------------------------ #Load all data examples = loadFile3("data/UPO/Experiments Folder/2014-11-17 11.08.31 AM/") examples2 = loadFile3( "data/UPO/Experiments Folder/2014-11-28 01.22.03 PM/") tot_examples = examples[0:12] + examples2[0:12] #Folds get generated here train_examples = tot_examples test_examples = tot_examples[0:5] X_train, diff_train = get_dataset(train_examples) X_test, diff_test = get_dataset(test_examples) #plt.show() #test_examples = tot_examples[-2::] dimensions = examples[0].states.shape[1] print 'Dimentions', dimensions #Build X which is the same for all regressors #y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0) estimators = [ adaboost_reg(X_train, diff_train[:, i], num_learners, tree_depth) for i in range(dimensions) ] fit = [] #for example in train_examples: # for n, step in enumerate(example.states[:-1]): # fit.append(predict_next(step,exax]mple.actions[n+1],estimators)) fit = np.array([estimator.predict(X_test) for estimator in estimators]) print fit.shape #fit[:,0] =np.arcsin(fit[:,0])*2 #plot_fit() return estimators
def learn_tran_regression(num_learners,tree_depth): #learns a transition function from data using adaboost. #------------------------------------------------------ #Load all data examples = loadFile2() examples2 = loadFile() tot_examples = examples + examples2 #Folds get generated here train_examples = tot_examples #test_examples = tot_examples[-2::] dimensions = examples[0].states.shape[1] #Build X which is the same for all regressors estimators = [] X =np.concatenate([np.hstack((example.states[:-1,:],example.actions[:-1,:])) for example in train_examples],axis = 0) y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0) estimators = [adaboost_reg(X,y[:,i],num_learners,tree_depth) for i in range(dimensions)] return estimators
def learn_correction(num_learners,tree_depth): def plot_fit(): f,axarr = plt.subplots(2,sharex=True) x = range(diff_test.shape[0]) axarr[0].plot(x,diff_test[:,0]) axarr[0].plot(x,fit[0,:],color='red',alpha = 0.6) axarr[0].set_xlabel("Data Sample") axarr[0].set_ylabel("sin(angle difference)") axarr[1].plot(x,diff_test[:,1]) axarr[1].plot(x,fit[1,:],color='red',alpha = 0.6) axarr[1].set_xlabel("Data Sample") axarr[1].set_ylabel("Distance Difference") #learns a transition function from data using adaboost. #------------------------------------------------------ #Load all data examples = loadFile3("data/UPO/Experiments Folder/2014-11-17 11.08.31 AM/") examples2 = loadFile3("data/UPO/Experiments Folder/2014-11-28 01.22.03 PM/") tot_examples = examples[0:12] + examples2[0:12] #Folds get generated here train_examples = tot_examples test_examples = tot_examples[0:5] X_train,diff_train = get_dataset(train_examples) X_test,diff_test = get_dataset(test_examples) #plt.show() #test_examples = tot_examples[-2::] dimensions = examples[0].states.shape[1] print 'Dimentions',dimensions #Build X which is the same for all regressors #y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0) estimators = [adaboost_reg(X_train,diff_train[:,i],num_learners,tree_depth) for i in range(dimensions)] fit = [] #for example in train_examples: # for n, step in enumerate(example.states[:-1]): # fit.append(predict_next(step,exax]mple.actions[n+1],estimators)) fit = np.array([estimator.predict(X_test) for estimator in estimators]) print fit.shape #fit[:,0] =np.arcsin(fit[:,0])*2 #plot_fit() return estimators
state_freq_test, state_action_frequencies_test = forward_sa(policy_test, model.transition, start_states, steps) reward_diff = np.sum(np.sum(np.absolute(model.reward_f - r_initial))) policy_diff = np.sum(np.sum(np.absolute(policy_test - policy_ref))) print "Difference in Reward --->", reward_diff print "Difference in Policy --->", policy_diff X = np.reshape(model.feature_f, (disc_model.tot_states * disc_model.tot_actions, 4)) Y = (state_action_frequencies_ref - state_action_frequencies_test).reshape( (disc_model.tot_states * disc_model.tot_actions) ) # X = X[np.nonzero(Y)[0]] # Y = Y[np.nonzero(Y)[0]] print np.nonzero(Y)[0].shape print Y.shape print "starting adaboost" ht = adaboost_reg(X, Y, 100, 10) print "finished adaboost" print "reducing gradient" model.reward_f = ( model.reward_f.reshape(states * actions) * np.exp(-1.0 * ht.predict(model.feature_f.reshape(states * actions, features))) ).reshape(actions, states) print "finished gradient" def sign(x): if x != 0: return math.copysign(1, x) else: return 0
avg_counts+=example["counts"] avg_counts/=len(examples) X= np.reshape(model.feature_f,(disc_model.tot_states*disc_model.tot_actions,4)) Y = np.zeros([disc_model.tot_states*disc_model.tot_actions]) def sign(x): if x!=0: return math.copysign(1,x) else: return 0 #Y = np.array(map(int,map(sign,(avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions))))) Y = (avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions)) X = X[np.nonzero(Y)[0]] Y = Y[np.nonzero(Y)[0]] ht = adaboost_reg(X,Y,200,4) for i in range(disc_model.tot_states): for j in range(disc_model.tot_actions): print "Before--->", model.reward_f[j,i] model.reward_f[j,i] = model.reward_f[j,i]*np.exp(-0.001*ht.predict([model.feature_f[j,i]])) print "After---->",model.reward_f[j,i] print np.amax(model.reward_f)
policy_test, model.transition, start_states, steps) reward_diff = np.sum(np.sum(np.absolute(model.reward_f - r_initial))) policy_diff = np.sum(np.sum(np.absolute(policy_test - policy_ref))) print "Difference in Reward --->", reward_diff print "Difference in Policy --->", policy_diff X = np.reshape(model.feature_f, (disc_model.tot_states * disc_model.tot_actions, 4)) Y = (state_action_frequencies_ref - state_action_frequencies_test).reshape( (disc_model.tot_states * disc_model.tot_actions)) #X = X[np.nonzero(Y)[0]] #Y = Y[np.nonzero(Y)[0]] print np.nonzero(Y)[0].shape print Y.shape print "starting adaboost" ht = adaboost_reg(X, Y, 100, 10) print "finished adaboost" print "reducing gradient" model.reward_f = (model.reward_f.reshape(states * actions) * np.exp( -1. * ht.predict(model.feature_f.reshape(states * actions, features))) ).reshape(actions, states) print "finished gradient" def sign(x): if x != 0: return math.copysign(1, x) else: return 0
avg_counts += example["counts"] avg_counts /= len(examples) X = np.reshape(model.feature_f, (disc_model.tot_states * disc_model.tot_actions, 4)) Y = np.zeros([disc_model.tot_states * disc_model.tot_actions]) def sign(x): if x != 0: return math.copysign(1, x) else: return 0 #Y = np.array(map(int,map(sign,(avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions))))) Y = (avg_counts - state_action_frequencies).reshape( (disc_model.tot_states * disc_model.tot_actions)) X = X[np.nonzero(Y)[0]] Y = Y[np.nonzero(Y)[0]] ht = adaboost_reg(X, Y, 200, 4) for i in range(disc_model.tot_states): for j in range(disc_model.tot_actions): print "Before--->", model.reward_f[j, i] model.reward_f[j, i] = model.reward_f[j, i] * np.exp( -0.001 * ht.predict([model.feature_f[j, i]])) print "After---->", model.reward_f[j, i] print np.amax(model.reward_f)