Beispiel #1
0
def run_in_batches():

    ## load test data
    X_train, y_train, labels = gen_data()
    X_test = gen_data(test=True, labels=labels)

    ## prep test data
    X_test, features, current_products = prep_test_data(
        X_train, y_train, X_test)

    ## setup training
    sess = None
    step = .02
    ## divide training in batches
    for batch in arange(0, 1, step):
        print("{:.2f}-{:.2f}".format(batch, batch + step))
        ## load partial train data
        X_batch, y_batch = gen_batch(X_train, y_train, batch, step, labels)
        ## prep data
        X_batch, y_batch = prep_train_data(X_batch, y_batch, features)
        ## train
        sess = neural_net(sess, X_batch, y_batch)  # ok

    ## once finished, predict
    preds = neural_net(sess, X_test, pred=True)  # ok

    ## finally export
    get_added_products(preds, X_test, labels, current_products)
Beispiel #2
0
 def __init__(self, epoch=10, batch_size=10, epsilon=1, gamma=.8):
     self.epoch = epoch
     self.batch_size = batch_size
     self.epsilon = epsilon
     self.gamma = gamma
     self.model = neural_net([15, 16])
     self.experience = []
Beispiel #3
0
def launch_learn(params, new=True):
    filename = params_to_filename(params)
    print("Trying %s" % filename)
    if not os.path.isfile('results/sonar-frames/loss_data-' + filename +
                          '.csv'):
        open('results/sonar-frames/loss_data-' + filename + '.csv',
             'a').close()
        print("Starting test.")
        # Train.
        if new:
            model = neural_net(NUM_INPUT, params['nn'])
            train_net(model, params)
        else:
            model = neural_net(NUM_INPUT, [128, 128],
                               'saved-models/128-128-64-50000-50000.h5')
            train_net(model, params)
    else:
        print("Already tested.")
Beispiel #4
0
def IRL_helper(weights, path, trainFrames, i):
    nn_param = [164, 150]
    params = {
        "batchSize": 100,
        "buffer": 50000,
        "nn": nn_param
    }
    model = neural_net(NUM_INPUT, nn_param)
    train_net(model, params, weights, path, trainFrames, i)
Beispiel #5
0
def IRL_helper(weights, path, trainFrames, i):
    nn_param = [164, 150]
    params = {
        "batchSize": 100,
        "buffer": 50000,
        "nn": nn_param
    }
    saved_model = 'saved-models_red/evaluatedPolicies/2-164-150-100-50000-100000.h5'
    model = neural_net(NUM_INPUT, nn_param, saved_model)
    train_net(model, params, weights, path, trainFrames, i)
	def __init__(self,params= {"batchSize": 64,"buffer": 50000, "nn": [128,128] }):
		self._filename = str(params['nn'][0]) + '-' + str(params['nn'][1]) + '-' + \
				str(params['batchSize']) + '-' + str(params['buffer'])
		self.observe = 1000  # Number of frames to observe before training.
		self.epsilon = 1
		self.train_frames = 100000  # Number of frames to play.
		self.batchSize = params['batchSize']
		buffer = params['buffer']
		self.t = 0 #number of iteration
		self.replay = []  # stores tuples of (S, A, R, S').
		self.loss_log = []
		self.model = neural_net(5, [128,128])
Beispiel #7
0
def launch_learn(params):
    filename = params_to_filename(params)
    print("Trying %s" % filename)
    if not os.path.isfile('results/sonar-frames/loss_data-' + filename +
                          '.csv'):
        open('results/sonar-frames/loss_data-' + filename + '.csv',
             'a').close()
        print("Starting test.")
        model = neural_net(NUM_INPUT, params['nn'])
        train_net(model, params)
    else:
        print("Already tested.")
Beispiel #8
0
def launch_learn(params, new=True):
    filename = params_to_filename(params)
    print("Trying %s" % filename)
    # Make sure we haven't run this one.
    if not os.path.isfile('results/sonar-frames/loss_data-' + filename +
                          '.csv'):
        # Create file so we don't double test when we run multiple
        # instances of the script at the same time.
        open('results/sonar-frames/loss_data-' + filename + '.csv',
             'a').close()
        print("Starting test.")
        # Train.
        if new:
            model = neural_net(NUM_INPUT, params['nn'])
            train_net(model, params)
        else:
            model = neural_net(NUM_INPUT, [128, 128],
                               'saved-models/128-128-64-50000-50000.h5')
            train_net(model, params)
    else:
        print("Already tested.")
	def initial_pred(self):
		'''
		Initial prediction to get a sense of accuracy before submitting
		'''

		self.csv_train = csv.reader(open('train.csv'))

		# Split and set data for training and testing
		total_features = []
		total_labels = []

		for row in self.csv_train:
			pclass = row[2]
			age = row[5]
			sex = row[6]
			parch = row[7]
			fare = row[9]

			# handle all null values
			if sex:
				if sex == 'female':
					sex = 1
				else:
					sex = 0
			else:
				sex = row[1]

			if not age:
				age = 0

			if not parch:
				parch = 0

			if not fare:
				fare = 0

			if not pclass:
				pclass = 0

			total_features.append([pclass, sex, age, parch, fare])
			total_labels.append(row[1])


		del total_features[0]
		del total_labels[0]

		# convert to floats
		total_features = [list(map(float, i)) for i in total_features]
		total_labels = map(float, total_labels)

		return neural_net(total_features, [total_labels])
def launch_learn(params):
    filename = params_to_filename(params)
    print("Trying %s" % filename)
    # Make sure we haven't run this one.
    if not os.path.isfile('results/sonar-frames/loss_data-' + filename + '.csv'):
        # Create file so we don't double test when we run multiple
        # instances of the script at the same time.
        open('results/sonar-frames/loss_data-' + filename + '.csv', 'a').close()
        print("Starting test.")
        # Train.
        best_action_model = neural_net(NUM_INPUT, params['nn'])
        train_net(best_action_model, params)
    else:
        print("Already tested.")
def launch_learn(params):
    filename = params_to_filename(params)
    print("Trying %s" % filename)
    # Make sure we haven't run this one.
    if not os.path.isfile('results/logs/loss_data-' + filename +
                          '-simple.csv'):
        # Create file so we don't double test when we run multiple
        # instances of the script at the same time.
        open('results/logs/loss_data-' + filename + '-simple.csv', 'a').close()
        print("Starting test.")
        # Train.
        model = neural_net(NUM_INPUT, params['nn'])
        train(model, params)
    else:
        print("Already tested.")
Beispiel #12
0
def on_request(ch, method, props, body):
    print(body)
    distMat = str(body).split(",")[1:-1]
    distMat = list(map(int, distMat))
    print(distMat)

    saved_model = 'saved-models/128-128-64-50000-25000.h5'
    model = neural_net(NUM_SENSORS=3, [128, 128], saved_model)

    action = getAction(distMat)  #get action here
    response = action

    ch.basic_publish(exchange='',
                     routing_key=props.reply_to,
                     properties=pika.BasicProperties(correlation_id = \
                                                         props.correlation_id),
                     body=str(response))
    ch.basic_ack(delivery_tag=method.delivery_tag)
Beispiel #13
0
import timeit

NUM_INPUT = 11


def play(model):

    game_state = flappy.Game()
    game_state.init_elements()

    # Do nothing to get initial.
    state, _ = game_state.frame_step(0)

    # Move.
    while True:

        # Choose action.
        action = (np.argmax(model.predict(np.array([state]))[0]))

        # Take action.
        state, reward = game_state.frame_step(action)

        if reward == -1000:
            break


if __name__ == "__main__":
    saved_model = 'results/saved-models/256-256-512-50000-ver19-300000.h5'
    model = neural_net(NUM_INPUT, [256, 256], saved_model)
    play(model)
if __name__ == "__main__":
    if TUNING:
        param_list = []
        nn_params = [[164, 150], [256, 256],
                     [512, 512], [1000, 1000]]
        batchSizes = [40, 100, 400]
        buffers = [10000, 50000]

        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)

        for param_set in param_list:
            launch_learn(param_set)

    else:
        nn_param = [240, 160, 80]
        params = {
            "batchSize": 100,
            "buffer": 50000,
            "nn": nn_param
        }
        best_action_model = neural_net(NUM_INPUT, nn_param, NUM_OUTPUT)
        train_net(best_action_model, params)
NUM_SENSORS = 19


def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    reward, state = game_state.frame_step((2))

    # Change this to "whilte True" to make it never die.
    while reward != -500:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        reward, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)

    print("Made it %d frames." % car_distance)

if __name__ == "__main__":
    model = neural_net(NUM_SENSORS, True)
    play(model)
Beispiel #16
0
data = pd.read_csv('/home/prathamesh/venv/HAR_MiniProject/final_data1.csv')
#enc = preprocessing.OneHotEncoder()
#e = enc.fit_transform(data.iloc[:,-1].values.reshape(-1,1))
#enc.transform(data.iloc[:,-1].values.reshape(-1,1))
e = np_utils.to_categorical(data.iloc[:,-1].values)
#print(e.shape)
#data.iloc[4,1:7]

X = data.iloc[:,1:7].values
y = e
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)
#print(X_train.shape, X_test.shape, X_val.shape)

model_1 = neural_net([6,9,6])
tr,v = model_1.sgd(X_train, y_train, batch_size=578, epochs=30, eta=0.1, lmbda=0.1, vldt=X_val, vldt_labels=y_val, k_fold=False)

#for i,j in zip(tr,v):
#	print(i, j)

'''
            ftr = train_samples.shape[0]//batch_size
            samples = [train_samples[i:i+batch_size].reshape(batch_size,-1) for i in range(ftr)]
            labels = [train_labels[i:i+batch_size].reshape(batch_size,-1) for i in range(ftr)]
            if train_samples.shape[0]%batch_size != 0:
                t1 = train_labels[ftr*batch_size:].reshape(train_labels.shape[0] - batch_size*ftr,-1).all()
                labels.append(t1)
                samples.append(train_samples[ftr*batch_size:].reshape(train_samples.shape[0] - batch_size*ftr,-1).all())
'''
Beispiel #17
0
 def load_model(self, filename):
     self.model = neural_net(self.sequence_length, self.number_of_actions,
                             self.params["nn"], filename)
    for memory in minibatch:
        # Get stored values.
        old_state_m, action_m, reward_m, new_state_m = memory
        # Get prediction on old state.
        old_qval = model.predict(old_state_m, batch_size=1)
        # Get prediction on new state.
        newQ = model.predict(new_state_m, batch_size=1)
        # Get our best move. I think?
        maxQ = np.max(newQ)
        y = np.zeros((1, 3))
        y[:] = old_qval[:]
        # Check for terminal state.
        if reward_m != -500:  # non-terminal state
            update = (reward_m + (GAMMA * maxQ))
        else:  # terminal state
            update = reward_m
        # Update the value for the action we took.
        y[0][action_m] = update
        X_train.append(old_state_m.reshape(NUM_SENSORS,))
        y_train.append(y.reshape(3,))

    X_train = np.array(X_train)
    y_train = np.array(y_train)

    return X_train, y_train

if __name__ == "__main__":
    # Get the model and train our neural net!
    model = neural_net(NUM_SENSORS)
    train_net(model)
Beispiel #19
0
import keras
import numpy as np
import random
from nn import neural_net
import h5py
import Usonic
import time

SAVED_MODEL = 'saved-models/ex1.h5'
NUM_INPUT = 7
NUM_SENSOR_RESOLUTION = 39
NN_SIZE = np.array([256, 256])
test_cnt = 0

model = neural_net(NUM_INPUT, NN_SIZE, SAVED_MODEL)
#model = neural_net(NUM_INPUT, NN_SIZE)
start = time.time()
while test_cnt < 3601:
    Dist = Usonic.distanceAll() / 10
    #In = np.random.random_integers(NUM_SENSOR_RESOLUTION, size=(NUM_INPUT))
    In = np.array(
        [Dist[2], Dist[2], Dist[0], Dist[0], Dist[0], Dist[1], Dist[1]])
    In = In.astype(int)
    state = np.array([In])
    action = np.argmax(model.predict(state, batch_size=1))
    test_cnt += 1
    #print(In)
    print("Number %d || action = %d || Dist = %d, %d, %d" %
          (test_cnt, action, Dist[0], Dist[1], Dist[2]))
end = time.time()
interval = end - start
Beispiel #20
0
def train_regressive_nn(d, p=15, epochs=10, alpha=0.5):
    """
    Train a neural-net for regression with:
        p input units
        p/2 hidden units
        1 output unit
    Returns the NN.
    """
    ndat = p*((len(d)-p)/p)-p
    x_train = np.empty((ndat, p))
    for i in xrange(ndat):
        x_train[i,:] = d[i:p+i]
    y_train =  np.atleast_2d(d[p:p+ndat]).T
    assert x_train.shape[0]==y_train.shape[0]

    fname_str = "../data/SS_%d_alpha%0.3f_%s.cp"%(epochs, alpha, 'CE')
    print "Saving results to file : ", fname_str

    start_time = time.time()
    ntrain, n_in = x_train.shape
    arch = [p, p/2, 1] ## neural-net with 1 hidden layer of size p/2.

    print "Training set size = %d"%ntrain

    EPOCHS = epochs
    BATCH_SIZE  = 200
    MIN_GRAD = 1e-4 # if the magnitude of the gradient is smaller than this => stop.
    mag_dW   = 2*MIN_GRAD
    eta    = 1.0
    ETAS   = np.array([eta/(i+1)**alpha for i in xrange(EPOCHS)])
    print "  => learning rates: from %0.3f to %0.3f"%(ETAS[0], ETAS[-1])

    NN = nn.neural_net(arch, [nn.f_tanh(), nn.f_tanh(), nn.f_identity()])

    train_error = []
    obj_error   = []
    run_time    = []

    ## iterate over each epoch:
    for t in xrange(EPOCHS):
      
      if mag_dW < MIN_GRAD:
        print "Stopping at epoch %d due to small gradient = %f"%(t, mag_dW)
        break

      rand_idx = np.arange(ntrain)
      np.random.shuffle(rand_idx)
      n_batch  = int(math.ceil(ntrain / (BATCH_SIZE+0.0))) 
      learn_rate = ETAS[t]

      ## do learning for each mini-batch:
      for i in xrange(n_batch):
        if t!=0:
          sys.stdout.write( "Epoch : % 4d/% 4d | error : train %0.3f | Batch : % 4d/% 4d"%(t+1, EPOCHS, train_error[-1], i+1, n_batch) )
        else:
          sys.stdout.write( "Epoch : %04d/%04d | Batch : %04d/%04d "%(t+1, EPOCHS, i+1, n_batch) )    
        sys.stdout.flush()
        if i!= n_batch:
          idx_batch = rand_idx[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        else:
          idx_batch = rand_idx[i*BATCH_SIZE:]

        x, y = x_train[idx_batch,:], y_train[idx_batch,:]
        mag_dW, _, obj_err = NN.update_batch(nn.squared_error(), x, y, learn_rate)
        mag_dW = np.mean(mag_dW)

        sys.stdout.write('\r')
        sys.stdout.flush()

      if t%10==0:
        err = 0.0
        for i in xrange(x_train.shape[0]):
            err += np.abs(y_train[i,:]-NN.classify(x_train[i,:],regress=True))
        err /= (x_train.shape[0]+0.0)
        train_error.append(err)
        obj_error.append(obj_err)
        run_time.append(time.time()-start_time)

        cp.dump({'train_error':train_error,
                 'obj_error':obj_error,
                 'time': run_time}, open(fname_str,'w'))
    print
    return NN
Beispiel #21
0
START_DISTANCE = 0


def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    state, _, speed, _, _, _ = game_state.frame_step(START_ACTION, START_SPEED, START_DISTANCE)

    # Move.
    while True:
        car_distance += 1
        
        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))
        
        # Take action.
        state, _, speed, _, _, _ = game_state.frame_step(action, speed, car_distance)
        
        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)


if __name__ == "__main__":
    saved_model = 'saved-best_action_models/240-160-100-50000-100000.h5'
    model = neural_net(NUM_INPUT, [240, 160, 80], NUM_OUTPUT, saved_model)
    play(model)
Beispiel #22
0
        train_net(best_action_model, params)
    else:
        print("Already tested.")


if __name__ == "__main__":
    if TUNING:
        param_list = []
        nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]]
        batchSizes = [40, 100, 400]
        buffers = [10000, 50000]

        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)

        for param_set in param_list:
            launch_learn(param_set)

    else:
        nn_param = [240, 160, 80]
        params = {"batchSize": 100, "buffer": 50000, "nn": nn_param}
        best_action_model = neural_net(NUM_INPUT, nn_param, NUM_OUTPUT)
        train_net(best_action_model, params)
Beispiel #23
0
        # clock.tick(rate)
        print(car_reward)

        # time.sleep(0.1)


if __name__ == "__main__":
    # saved_model = 'saved-models/5x5-82n-75n-100-50000-2000.h5'
    # model = neural_net(25, [82, 75], saved_model)
    # play_grid(model)

    nn = [164, 41]
    num_input = 3

    saved_model = 'saved-models/lane_following-' + \
        str(nn[0]) + 'n-' + str(nn[1]) + \
        'n-10000frames-50-50000buffer-rms-6000.h5'
    params = {
        'nodes1': nn[0],
        'nodes2': nn[1],
        'x_dim': X_DIM,
        'y_dim': Y_DIM,
        "batchSize": 100,
        "buffer": 50000,
        "nn": nn,
        'solver': 'rms',
        'num_actions': 3
    }
    model = neural_net(num_input, params, saved_model)
    play_lane_following(model)
NUM_SENSORS = 3


def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))

    # Move.
    while True:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        _, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)


if __name__ == "__main__":
    saved_model = 'saved-models/164-150-400-50000-50000.h5'
    model = neural_net(NUM_SENSORS, [164, 150], saved_model)
    play(model)
Beispiel #25
0

def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state, stuff = game_state.frame_step((2))

    # Move.
    while True:
        #time.sleep(0.05)
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        _, state, stuff = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("\n Current distance: %d frames." % car_distance)


if __name__ == "__main__":
    saved_model = 'saved-models/BLE/final/FINAL164-150-100-50000-300000.h5'
    model = neural_net(NUM_SENSORS, [164, 150], saved_model)
    play(model)
Beispiel #26
0

if __name__ == "__main__":
    if TUNING:
        param_list = []
        nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]]
        batchSizes = [40, 100, 400]
        buffers = [10000, 50000]

        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)

        for param_set in param_list:
            launch_learn(param_set)

    else:
        saved_model = 'saved-models/128-128-64-50000-100000-gen2.h5'
        trained_model = neural_net(NUM_INPUT, [128, 128], saved_model)
        nn_param = [128, 128]
        params = {"batchSize": 64, "buffer": 50000, "nn": nn_param}
        model = neural_net(NUM_INPUT, nn_param)

        train_net(model, trained_model, params)
Beispiel #27
0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))
    train_state = np.append(lastState, state[0])
    train_state = np.append(train_state, lastaction)
    train_state = np.expand_dims(train_state, axis=0)
    # Move.
    while True:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(train_state, batch_size=1)))
        print(action)

        # Take action.
        _, state = game_state.frame_step(action)
        train_state = np.append(lastState, state[0])

        train_state = np.append(train_state, action)
        train_state = np.expand_dims(train_state, axis=0)
        lastState = state[0]
        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)


if __name__ == "__main__":
    saved_model = 'saved-models/128-128-400-100000-70000.h5'
    model = neural_net(NUM_SENSORS, [128, 128], saved_model, dropout=True)
    play(model)

def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    reward, state = game_state.frame_step((2))

    # Change this to "whilte True" to make it never die.
    while reward != -500:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        reward, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)

    print("Made it %d frames." % car_distance)


if __name__ == "__main__":
    model = neural_net(NUM_SENSORS, True)
    play(model)
Beispiel #29
0
 def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent
     IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below
     saved_model = 'saved-models_'+self.behavior+'/evaluatedPolicies/'+str(i)+'-164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE
     model = neural_net(self.num_states, [164, 150], saved_model)
     return  play(model, W)#return feature expectations by executing the learned policy
Beispiel #30
0
#Control parameters
target_speed = 10
fallback_sec = 1
nn_actPoint = 21

#Sarsa 0 parameters
PUNISH = -1000
GAMMA = 0.975
sarsa0P = ( PUNISH, GAMMA)

sensor_h = [] #empty array for sensor handles
sensor_val=np.array([]) #empty array for sensor measurements
sensor_state=np.array([]) #empty array for sensor measurements

#Initialize Neural Network
model = neural_net(NUM_INPUT, nn_param)


#Sensor handlers
sensorList = ('Proximity_sensor1', 'Proximity_sensor2', 'Proximity_sensor3')
sensor_errorCode, sensor_handles = tv.ObjectHandle(clientID, sensorList)

#Read sensor raw data (first time initial)
readval_errorCode, sensor_val, sensor_state = tv.INI_ReadProximitySensor(clientID, sensor_handles)

#for x in range(1,3+1):
    #errorCode,sensor_handle=vrep.simxGetObjectHandle(clientID,'Proximity_sensor'+str(x),vrep.simx_opmode_oneshot_wait)
    #sensor_h.append(sensor_handle) #keep list of handles
    #errorCode,detectionState,detectedPoint,detectedObjectHandle,detectedSurfaceNormalVector=vrep.simxReadProximitySensor(clientID,sensor_handles[x-1],vrep.simx_opmode_streaming)                
    #sensor_val=np.append(sensor_val,np.linalg.norm(detectedPoint)) #get list of values
    #sensor_state=np.append(sensor_state,detectionState) #get list of values
Beispiel #31
0
def train_net(model_path,
              params,
              weights,
              path,
              trainFrames,
              i,
              FEATSIZE,
              irl=True):

    print "start Training . . ."
    filename = params_to_filename(params)
    curDay = str(datetime.datetime.now().date())
    curtime = str(datetime.datetime.now().time())
    basePath = 'saved-models_' + path + '/evaluatedPoliciesTest/'
    subPath = curDay + '/' + curtime + '/'
    curDir = basePath + subPath
    os.makedirs(curDir)
    if os.path.exists(curDir):
        print "YES"
    observe = 100  # Number of frames to observe before training.
    epsilon = .5
    train_frames = trainFrames  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []
    loss_plot = []

    #Make changes here - read from a file
    # Create a new game instance.

    game = BE.createBoardIRL(sensor_size=FEATSIZE,
                             display=False,
                             saved_model=model_path,
                             weights=weights,
                             hidden_layers=params['nn'])

    #create the target network
    targetNetwork = neural_net(FEATSIZE, params['nn'], 4)
    stepcounter = 0  #keeps track of how many steps the learning network has taken ahead of the target network
    UPDATETARGET = 50  #no of times after which the target network needs to be updated
    targetNetwork.load_state_dict(game.agentBrain.state_dict())
    targetNetwork.eval()
    targetNetwork.cuda()

    criterion = NN.SmoothL1Loss()  #huber loss
    #criterion = NN.MSELoss()
    optimizer = optim.RMSprop(game.agentBrain.parameters(), lr=.001)
    # Get initial state by doing nothing and getting the state.
    #_, state, temp1 = game_state.frame_step((2))
    game.reset()

    #after this point state is referred to as the sensor_readings
    state = game.sensor_readings
    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:
        if t % 2000 == 0:
            print t
            plt.figure()
            plt.plot(loss_plot)
            pltfile = 'saved_plots/' + 'smallexperiment/' + 'featsize-' + str(
                FEATSIZE) + str(i) + '-' + 'epoch' + str(t)
            plt.savefig(pltfile + '.png')
            #playing.test_model(weights, )
        t += 1
        car_distance += 1

        # Choose an action. so, as long as t < observe we take random actions?
        if random.random() < epsilon or t < observe:
            actionIndex = np.random.randint(0, 3)  # random #3
            action = game.agent_action_to_WorldAction(actionIndex)
        else:
            # Get Q values for each action.

            actionIndex = game.gen_action_from_agent()

            # *** agent_action_to_WorldAction() **** converts the action(which is basically an index that points to the action with the best qvalue according to the neural net)

            # *** to an action that is actually used in the game environment. An (x,y) tuple, which depicts the movement of the agent in the game environment

            #  action [2]   ---- > action [(3,4)]
            action = game.agent_action_to_WorldAction(actionIndex)
            #qval = model.predict(state, batch_size=1)
            #action = (np.argmax(qval))  # this step is already done in the method : gen_action_from_agent()
            #print ("action under learner ", action)

        # Take action, observe new state and get our treat.
        new_state, reward, done, _ = game.step(action)
        new_state = game.sensor_readings
        # Experience replay storage.
        replay.append((state, actionIndex, reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            #print len(replay) , batchSize
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch(
                minibatch, game.agentBrain,
                targetNetwork)  #instead of the model

            #print 'Xtrain',X_train
            #print 'y_train', y_train

            #print "Printing from train and test in learning.py :"
            #print type(X_train) , X_train.size
            #print type(y_train) , y_train.size
            # Train the model on this batch.
            #history = LossHistory()

            y_train = torch.from_numpy(y_train)

            y_train = y_train.type(torch.cuda.FloatTensor)
            #chagnes to be done from here
            #change the train method from keras to pytorch

            #X_train has to be a tensor of size n x 44 x 1
            #y_train has to be a tensor of size n x 1 x 1 ??

            output = game.agentBrain(X_train)
            loss = criterion(output, y_train)

            optimizer.zero_grad()
            loss.backward()
            #print loss.item()
            #print type(loss.item())
            optimizer.step()
            stepcounter += 1

            if stepcounter == UPDATETARGET:
                targetNetwork.load_state_dict(game.agentBrain.state_dict())
                stepcounter = 0
                #print 'Updated'

            loss_log.append([t, loss.item()])
            loss_plot.append(loss.item())

        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)

        # We died, so update stuff.
        if done == True:
            # Log the car's distance at this T.
            data_collect.append([
                t,
                car_distance,
            ])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            #print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
            #(max_car_distance, t, epsilon, car_distance, fps))

            # Reset.
            game.reset()
            car_distance = 0
            start_time = timeit.default_timer()

        # Save the model
        if t % 2000 == 0:

            #game.agentBrain.save_weights('saved-models_'+ path +'/evaluatedPolicies/'+str(i)+'-'+ filename + '-' +
            #                  str(t) + '.h5',
            #                  overwrite=True)
            torch.save(
                game.agentBrain.state_dict(),
                'saved-models_' + path + '/evaluatedPoliciesTest/' + subPath +
                str(i) + '-' + filename + '-' + str(t) + '.h5',
            )
            savedFilename = 'saved-models_' + path + '/evaluatedPoliciesTest/' + subPath + str(
                i) + '-' + filename + '-' + str(t) + '.h5'
            with open('results/model_paths.txt', 'w') as ff:
                ff.write('saved-models_' + path + '/evaluatedPoliciesTest/' +
                         subPath + str(i) + '-' + filename + '-' + str(t) +
                         '.h5\n')
                ff.close()
            print("Saving model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log, i)
    #print "Testing the model :"

    return savedFilename
Beispiel #32
0
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))
    log_results(filename, data_collect, loss_log)


if __name__ == '__main__':
    if TUNING:
        param_list = []
        nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]]
        batchSizes = [40, 100, 400]
        buffers = [10000, 50000]
        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)
        for param_set in param_list:
            launch_learn(param_set)
    else:
        nn_param = [164, 150]
        params = {"batchSize": 100, "buffer": 50000, "nn": nn_param}
        model = nn.neural_net(NUM_INPUT, nn_param)
        train_net(model, params)
Beispiel #33
0
    def __init__(self,
                 params,
                 load_replay_file=None,
                 save_replay_file_prefix="replay",
                 save_model_file_prefix="saved-models/",
                 save_every=500,
                 end_value=-500):
        # This where the input values are saved so they can be used in other functions whithin the class

        self.params = params
        # sequence_length specifies the number of commands that form a state
        self.sequence_length = params['sequence_length']
        # number_of_actions specifies the number of possible actions
        self.number_of_actions = params["number_of_actions"]
        # The neural network is build here
        self.model = neural_net(self.sequence_length, self.number_of_actions,
                                params["nn"])
        # The name that will be used when saving the neural network model
        self.filename = params_to_filename(params)
        # Specifes the number of states to be inputed in replay before saving
        self.save_every = save_every
        # A prefix to the name of the replay file when saved
        self.save_replay_file_prefix = save_replay_file_prefix
        self.save_model_file_prefix = save_model_file_prefix
        # The value check for at the end of the "game"
        self.end_value = end_value
        # Forgetting value
        self.GAMMA = params["GAMMA"]

        self.observe = 1000  # Number of frames to observe before training.
        self.epsilon = 1  # Chance to choose random action
        self.train_frames = 10000  # Number of frames to play.
        self.batchSize = params['batchSize']
        self.buffer = params['buffer']

        if isinstance(params["cmd2number_reward"], str):
            #if string load from file
            self.cmd2number_reward = pickle.load(
                open(params["cmd2number_reward"], "rb"))
        else:
            # if dictionary
            self.cmd2number_reward = params["cmd2number_reward"]

        self.state = np.zeros(params["sequence_length"])
        self.state_index = 0

        # Just stuff used below.
        self.max_hacker_cmds = 0
        self.hacker_cmds = 0

        self.data_collect = []
        if load_replay_file is None:
            self.replay = []  # stores tuples of (S, A, R, S').
        else:
            self.replay = pickle.load(open(load_replay_file, "rb"))

        self.t = len(self.replay)
        self.loss_log = []

        # Let's time it.
        self.start_time = timeit.default_timer()
        self.state = np.zeros(10)
        self.lastAction = 0
if __name__ == "__main__":
    if TUNING:
        param_list = []
        nn_params = [[20, 20], [164, 150], [256, 256],
                     [512, 512], [1000, 1000]]
        batchSizes = [32, 40, 100, 400]
        buffers = [10000, 50000, 500000]

        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)

        for param_set in param_list:
            launch_learn(param_set)

    else:
        nn_param = [1000, 1000]
        params = {
            "batchSize": 40,
            "buffer": 500000,
            "nn": nn_param
        }
        model = neural_net(NUM_SENSORS, nn_param)
        train_net(model, params)
Beispiel #35
0
        # Take action.
        immediateReward, state, readings = game_state.frame_step(action)
        #print ("immeditate reward:: ", immediateReward)
        #print ("readings :: ", readings)
        #start recording feature expectations only after 100 frames
        if car_distance > 100:
            featureExpectations += (GAMMA**(car_distance -
                                            101)) * np.array(readings)
        #print ("Feature Expectations :: ", featureExpectations)
        # Tell us something.
        if car_distance % 2000 == 0:
            print("Current distance: %d frames." % car_distance)
            break

    return featureExpectations


if __name__ == "__main__":  # ignore
    BEHAVIOR = sys.argv[1]
    ITERATION = sys.argv[2]
    FRAME = sys.argv[3]
    saved_model = 'saved-models_' + BEHAVIOR + '/evaluatedPolicies/' + str(
        ITERATION) + '-164-150-100-50000-' + str(FRAME) + '.h5'
    weights = [
        -0.79380502, 0.00704546, 0.50866139, 0.29466834, -0.07636144,
        0.09153848, -0.02632325, -0.09672041
    ]
    model = neural_net(NUM_STATES, [164, 150], saved_model)
    print(play(model, weights))
Beispiel #36
0
    car_distance = 0
    game_state = main.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))
    exit = False
    # Move.
    while not exit:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        _, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)

        # Event queue
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit = True


if __name__ == "__main__":
    saved_model = 'saved-models/320-320-1000-50000-100000.h5'
    model = neural_net(NUM_SENSORS, [320, 320], saved_model)
    play(model)
Beispiel #37
0
        param_list = []
        nn_params = [[164, 150], [256, 256],
                     [512, 512], [1000, 1000]]
        batchSizes = [40, 100, 400]
        buffers = [10000, 50000]

        for nn_param in nn_params:
            for batchSize in batchSizes:
                for buffer in buffers:
                    params = {
                        "batchSize": batchSize,
                        "buffer": buffer,
                        "nn": nn_param
                    }
                    param_list.append(params)

        for param_set in param_list:
            launch_learn(param_set)

    else:
        nn_param = [128, 128]
        params = {
            "batchSize": 64,
            "buffer": 50000,
            "nn": nn_param
        }
        saved_model = 'saved-models/128-128-64-50000-100000.h5'
        model = neural_net(NUM_INPUT, [128, 128], saved_model)
#        model = neural_net(NUM_INPUT, nn_param)
        train_net(model, params)
Beispiel #38
0
import keras
import numpy as np
import random
from nn import neural_net
import h5py
import Usonic
import time

SAVED_MODEL = 'saved-models/ex1.h5'
NUM_INPUT = 7
NUM_SENSOR_RESOLUTION = 39
NN_SIZE = np.array([256, 256])
test_cnt = 0

model = neural_net(NUM_INPUT, NN_SIZE, SAVED_MODEL)
#model = neural_net(NUM_INPUT, NN_SIZE)
start = time.time()
while test_cnt < 3601:
    Dist = Usonic.distanceAll() / 10
    #In = np.random.random_integers(NUM_SENSOR_RESOLUTION, size=(NUM_INPUT))
    In = np.array([Dist[2],Dist[2],Dist[0],Dist[0],Dist[0],Dist[1],Dist[1]])
    In = In.astype(int)
    state = np.array([In])
    action = np.argmax(model.predict(state, batch_size = 1))
    test_cnt +=1
    #print(In)
    print("Number %d || action = %d || Dist = %d, %d, %d" %(test_cnt, action, Dist[0], Dist[1], Dist[2]))
end = time.time()
interval = end - start

print("Time Elaspsed: %d sec" %interval)
Beispiel #39
0
NUM_SENSORS = 5


def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))

    # Move.
    while True:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        _, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)


if __name__ == "__main__":
    saved_model = 'saved-models/128-128-64-50000-50000.h5'
    model = neural_net(NUM_SENSORS, [128, 128], saved_model)
    play(model)
Beispiel #40
0
    #
    #     for param_set in param_list:
    #         launch_learn(param_set)
    #
    # else:
    nn_param = [164, 150]
    nn_param = [164, 0]
    nn_param = [64, 32]

    nn_param = [12, 8]
    nn_param = [164, 41]

    params = {
        'nodes1': nn_param[0],
        'nodes2': nn_param[1],
        'x_dim': X_DIM,
        'y_dim': Y_DIM,
        "batchSize": 50,
        "buffer": 50000,
        "nn": nn_param,
        'solver': 'rms',
        'num_actions': 3
    }
    model = neural_net(NUM_INPUT, params)
    print('made model')
    train_net(model, params, 'lane_following')

    play_lane_following(model)

    # play(model)