Ejemplo n.º 1
0
def learn_tran_regression(num_learners, tree_depth):
    #learns a transition function from data using adaboost.
    #------------------------------------------------------
    #Load all data
    examples = loadFile2()
    examples2 = loadFile()
    tot_examples = examples + examples2
    #Folds get generated here
    train_examples = tot_examples
    #test_examples = tot_examples[-2::]
    dimensions = examples[0].states.shape[1]
    #Build X which is the same for all regressors
    estimators = []
    X = np.concatenate([
        np.hstack((example.states[:-1, :], example.actions[:-1, :]))
        for example in train_examples
    ],
                       axis=0)
    y = np.concatenate([example.states[1:, :] for example in train_examples],
                       axis=0)
    estimators = [
        adaboost_reg(X, y[:, i], num_learners, tree_depth)
        for i in range(dimensions)
    ]
    return estimators
Ejemplo n.º 2
0
def learn_correction(num_learners, tree_depth):
    def plot_fit():
        f, axarr = plt.subplots(2, sharex=True)
        x = range(diff_test.shape[0])
        axarr[0].plot(x, diff_test[:, 0])
        axarr[0].plot(x, fit[0, :], color='red', alpha=0.6)
        axarr[0].set_xlabel("Data Sample")
        axarr[0].set_ylabel("sin(angle difference)")
        axarr[1].plot(x, diff_test[:, 1])
        axarr[1].plot(x, fit[1, :], color='red', alpha=0.6)
        axarr[1].set_xlabel("Data Sample")
        axarr[1].set_ylabel("Distance Difference")

    #learns a transition function from data using adaboost.
    #------------------------------------------------------
    #Load all data
    examples = loadFile3("data/UPO/Experiments Folder/2014-11-17 11.08.31 AM/")
    examples2 = loadFile3(
        "data/UPO/Experiments Folder/2014-11-28 01.22.03 PM/")
    tot_examples = examples[0:12] + examples2[0:12]

    #Folds get generated here
    train_examples = tot_examples
    test_examples = tot_examples[0:5]
    X_train, diff_train = get_dataset(train_examples)
    X_test, diff_test = get_dataset(test_examples)
    #plt.show()
    #test_examples = tot_examples[-2::]
    dimensions = examples[0].states.shape[1]
    print 'Dimentions', dimensions
    #Build X which is the same for all regressors
    #y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0)
    estimators = [
        adaboost_reg(X_train, diff_train[:, i], num_learners, tree_depth)
        for i in range(dimensions)
    ]
    fit = []
    #for example in train_examples:
    #	for n, step in enumerate(example.states[:-1]):
    #		fit.append(predict_next(step,exax]mple.actions[n+1],estimators))
    fit = np.array([estimator.predict(X_test) for estimator in estimators])
    print fit.shape
    #fit[:,0] =np.arcsin(fit[:,0])*2

    #plot_fit()

    return estimators
Ejemplo n.º 3
0
def learn_tran_regression(num_learners,tree_depth):
	#learns a transition function from data using adaboost.
	#------------------------------------------------------
	#Load all data
	examples = loadFile2()
	examples2 = loadFile()
	tot_examples = examples + examples2
	#Folds get generated here
	train_examples = tot_examples
	#test_examples = tot_examples[-2::]
	dimensions = examples[0].states.shape[1]
	#Build X which is the same for all regressors
	estimators = []	
	X =np.concatenate([np.hstack((example.states[:-1,:],example.actions[:-1,:])) for example in train_examples],axis = 0)
	y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0)
	estimators = [adaboost_reg(X,y[:,i],num_learners,tree_depth) for i in range(dimensions)]
	return estimators
Ejemplo n.º 4
0
def learn_correction(num_learners,tree_depth):
	def plot_fit():
		f,axarr = plt.subplots(2,sharex=True)
		x = range(diff_test.shape[0])
		axarr[0].plot(x,diff_test[:,0]) 
		axarr[0].plot(x,fit[0,:],color='red',alpha = 0.6)
		axarr[0].set_xlabel("Data Sample")
		axarr[0].set_ylabel("sin(angle difference)")
		axarr[1].plot(x,diff_test[:,1]) 
		axarr[1].plot(x,fit[1,:],color='red',alpha = 0.6)
		axarr[1].set_xlabel("Data Sample")
		axarr[1].set_ylabel("Distance Difference")		

	#learns a transition function from data using adaboost.
	#------------------------------------------------------
	#Load all data
	examples = loadFile3("data/UPO/Experiments Folder/2014-11-17 11.08.31 AM/")
	examples2 = loadFile3("data/UPO/Experiments Folder/2014-11-28 01.22.03 PM/") 
	tot_examples = examples[0:12] + examples2[0:12]
	
	#Folds get generated here
	train_examples = tot_examples
	test_examples = tot_examples[0:5]
	X_train,diff_train = get_dataset(train_examples)
	X_test,diff_test = get_dataset(test_examples)
	#plt.show()
	#test_examples = tot_examples[-2::]
	dimensions = examples[0].states.shape[1]
	print 'Dimentions',dimensions
	#Build X which is the same for all regressors
	#y =np.concatenate([example.states[1:,:] for example in train_examples],axis = 0)
	estimators = [adaboost_reg(X_train,diff_train[:,i],num_learners,tree_depth) for i in range(dimensions)]
	fit = []
	#for example in train_examples:
	#	for n, step in enumerate(example.states[:-1]):
	#		fit.append(predict_next(step,exax]mple.actions[n+1],estimators))
	fit = np.array([estimator.predict(X_test) for estimator in estimators])
	print fit.shape
	#fit[:,0] =np.arcsin(fit[:,0])*2

	#plot_fit()

	return estimators
Ejemplo n.º 5
0
    state_freq_test, state_action_frequencies_test = forward_sa(policy_test, model.transition, start_states, steps)
    reward_diff = np.sum(np.sum(np.absolute(model.reward_f - r_initial)))
    policy_diff = np.sum(np.sum(np.absolute(policy_test - policy_ref)))
    print "Difference in Reward --->", reward_diff
    print "Difference in Policy --->", policy_diff
    X = np.reshape(model.feature_f, (disc_model.tot_states * disc_model.tot_actions, 4))
    Y = (state_action_frequencies_ref - state_action_frequencies_test).reshape(
        (disc_model.tot_states * disc_model.tot_actions)
    )
    # X = X[np.nonzero(Y)[0]]
    # Y = Y[np.nonzero(Y)[0]]

    print np.nonzero(Y)[0].shape
    print Y.shape
    print "starting adaboost"
    ht = adaboost_reg(X, Y, 100, 10)
    print "finished adaboost"
    print "reducing gradient"
    model.reward_f = (
        model.reward_f.reshape(states * actions)
        * np.exp(-1.0 * ht.predict(model.feature_f.reshape(states * actions, features)))
    ).reshape(actions, states)
    print "finished gradient"


def sign(x):
    if x != 0:
        return math.copysign(1, x)
    else:
        return 0
Ejemplo n.º 6
0
	avg_counts+=example["counts"]
avg_counts/=len(examples)

X= np.reshape(model.feature_f,(disc_model.tot_states*disc_model.tot_actions,4))
Y = np.zeros([disc_model.tot_states*disc_model.tot_actions])

def sign(x):
	if x!=0:
		return math.copysign(1,x)
	else:
		return 0

#Y = np.array(map(int,map(sign,(avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions)))))
Y = (avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions))
X = X[np.nonzero(Y)[0]]
Y = Y[np.nonzero(Y)[0]]

ht = adaboost_reg(X,Y,200,4)

for i in range(disc_model.tot_states):
	for j in range(disc_model.tot_actions):
		print "Before--->", model.reward_f[j,i]
		model.reward_f[j,i] = model.reward_f[j,i]*np.exp(-0.001*ht.predict([model.feature_f[j,i]]))	
		print "After---->",model.reward_f[j,i] 
print np.amax(model.reward_f)





	
Ejemplo n.º 7
0
        policy_test, model.transition, start_states, steps)
    reward_diff = np.sum(np.sum(np.absolute(model.reward_f - r_initial)))
    policy_diff = np.sum(np.sum(np.absolute(policy_test - policy_ref)))
    print "Difference in Reward --->", reward_diff
    print "Difference in Policy --->", policy_diff
    X = np.reshape(model.feature_f,
                   (disc_model.tot_states * disc_model.tot_actions, 4))
    Y = (state_action_frequencies_ref - state_action_frequencies_test).reshape(
        (disc_model.tot_states * disc_model.tot_actions))
    #X = X[np.nonzero(Y)[0]]
    #Y = Y[np.nonzero(Y)[0]]

    print np.nonzero(Y)[0].shape
    print Y.shape
    print "starting adaboost"
    ht = adaboost_reg(X, Y, 100, 10)
    print "finished adaboost"
    print "reducing gradient"
    model.reward_f = (model.reward_f.reshape(states * actions) * np.exp(
        -1. * ht.predict(model.feature_f.reshape(states * actions, features)))
                      ).reshape(actions, states)
    print "finished gradient"


def sign(x):
    if x != 0:
        return math.copysign(1, x)
    else:
        return 0

Ejemplo n.º 8
0
    avg_counts += example["counts"]
avg_counts /= len(examples)

X = np.reshape(model.feature_f,
               (disc_model.tot_states * disc_model.tot_actions, 4))
Y = np.zeros([disc_model.tot_states * disc_model.tot_actions])


def sign(x):
    if x != 0:
        return math.copysign(1, x)
    else:
        return 0


#Y = np.array(map(int,map(sign,(avg_counts - state_action_frequencies).reshape((disc_model.tot_states*disc_model.tot_actions)))))
Y = (avg_counts - state_action_frequencies).reshape(
    (disc_model.tot_states * disc_model.tot_actions))
X = X[np.nonzero(Y)[0]]
Y = Y[np.nonzero(Y)[0]]

ht = adaboost_reg(X, Y, 200, 4)

for i in range(disc_model.tot_states):
    for j in range(disc_model.tot_actions):
        print "Before--->", model.reward_f[j, i]
        model.reward_f[j, i] = model.reward_f[j, i] * np.exp(
            -0.001 * ht.predict([model.feature_f[j, i]]))
        print "After---->", model.reward_f[j, i]
print np.amax(model.reward_f)