def run_in_batches(): ## load test data X_train, y_train, labels = gen_data() X_test = gen_data(test=True, labels=labels) ## prep test data X_test, features, current_products = prep_test_data( X_train, y_train, X_test) ## setup training sess = None step = .02 ## divide training in batches for batch in arange(0, 1, step): print("{:.2f}-{:.2f}".format(batch, batch + step)) ## load partial train data X_batch, y_batch = gen_batch(X_train, y_train, batch, step, labels) ## prep data X_batch, y_batch = prep_train_data(X_batch, y_batch, features) ## train sess = neural_net(sess, X_batch, y_batch) # ok ## once finished, predict preds = neural_net(sess, X_test, pred=True) # ok ## finally export get_added_products(preds, X_test, labels, current_products)
def __init__(self, epoch=10, batch_size=10, epsilon=1, gamma=.8): self.epoch = epoch self.batch_size = batch_size self.epsilon = epsilon self.gamma = gamma self.model = neural_net([15, 16]) self.experience = []
def launch_learn(params, new=True): filename = params_to_filename(params) print("Trying %s" % filename) if not os.path.isfile('results/sonar-frames/loss_data-' + filename + '.csv'): open('results/sonar-frames/loss_data-' + filename + '.csv', 'a').close() print("Starting test.") # Train. if new: model = neural_net(NUM_INPUT, params['nn']) train_net(model, params) else: model = neural_net(NUM_INPUT, [128, 128], 'saved-models/128-128-64-50000-50000.h5') train_net(model, params) else: print("Already tested.")
def IRL_helper(weights, path, trainFrames, i): nn_param = [164, 150] params = { "batchSize": 100, "buffer": 50000, "nn": nn_param } model = neural_net(NUM_INPUT, nn_param) train_net(model, params, weights, path, trainFrames, i)
def IRL_helper(weights, path, trainFrames, i): nn_param = [164, 150] params = { "batchSize": 100, "buffer": 50000, "nn": nn_param } saved_model = 'saved-models_red/evaluatedPolicies/2-164-150-100-50000-100000.h5' model = neural_net(NUM_INPUT, nn_param, saved_model) train_net(model, params, weights, path, trainFrames, i)
def __init__(self,params= {"batchSize": 64,"buffer": 50000, "nn": [128,128] }): self._filename = str(params['nn'][0]) + '-' + str(params['nn'][1]) + '-' + \ str(params['batchSize']) + '-' + str(params['buffer']) self.observe = 1000 # Number of frames to observe before training. self.epsilon = 1 self.train_frames = 100000 # Number of frames to play. self.batchSize = params['batchSize'] buffer = params['buffer'] self.t = 0 #number of iteration self.replay = [] # stores tuples of (S, A, R, S'). self.loss_log = [] self.model = neural_net(5, [128,128])
def launch_learn(params): filename = params_to_filename(params) print("Trying %s" % filename) if not os.path.isfile('results/sonar-frames/loss_data-' + filename + '.csv'): open('results/sonar-frames/loss_data-' + filename + '.csv', 'a').close() print("Starting test.") model = neural_net(NUM_INPUT, params['nn']) train_net(model, params) else: print("Already tested.")
def launch_learn(params, new=True): filename = params_to_filename(params) print("Trying %s" % filename) # Make sure we haven't run this one. if not os.path.isfile('results/sonar-frames/loss_data-' + filename + '.csv'): # Create file so we don't double test when we run multiple # instances of the script at the same time. open('results/sonar-frames/loss_data-' + filename + '.csv', 'a').close() print("Starting test.") # Train. if new: model = neural_net(NUM_INPUT, params['nn']) train_net(model, params) else: model = neural_net(NUM_INPUT, [128, 128], 'saved-models/128-128-64-50000-50000.h5') train_net(model, params) else: print("Already tested.")
def initial_pred(self): ''' Initial prediction to get a sense of accuracy before submitting ''' self.csv_train = csv.reader(open('train.csv')) # Split and set data for training and testing total_features = [] total_labels = [] for row in self.csv_train: pclass = row[2] age = row[5] sex = row[6] parch = row[7] fare = row[9] # handle all null values if sex: if sex == 'female': sex = 1 else: sex = 0 else: sex = row[1] if not age: age = 0 if not parch: parch = 0 if not fare: fare = 0 if not pclass: pclass = 0 total_features.append([pclass, sex, age, parch, fare]) total_labels.append(row[1]) del total_features[0] del total_labels[0] # convert to floats total_features = [list(map(float, i)) for i in total_features] total_labels = map(float, total_labels) return neural_net(total_features, [total_labels])
def launch_learn(params): filename = params_to_filename(params) print("Trying %s" % filename) # Make sure we haven't run this one. if not os.path.isfile('results/sonar-frames/loss_data-' + filename + '.csv'): # Create file so we don't double test when we run multiple # instances of the script at the same time. open('results/sonar-frames/loss_data-' + filename + '.csv', 'a').close() print("Starting test.") # Train. best_action_model = neural_net(NUM_INPUT, params['nn']) train_net(best_action_model, params) else: print("Already tested.")
def launch_learn(params): filename = params_to_filename(params) print("Trying %s" % filename) # Make sure we haven't run this one. if not os.path.isfile('results/logs/loss_data-' + filename + '-simple.csv'): # Create file so we don't double test when we run multiple # instances of the script at the same time. open('results/logs/loss_data-' + filename + '-simple.csv', 'a').close() print("Starting test.") # Train. model = neural_net(NUM_INPUT, params['nn']) train(model, params) else: print("Already tested.")
def on_request(ch, method, props, body): print(body) distMat = str(body).split(",")[1:-1] distMat = list(map(int, distMat)) print(distMat) saved_model = 'saved-models/128-128-64-50000-25000.h5' model = neural_net(NUM_SENSORS=3, [128, 128], saved_model) action = getAction(distMat) #get action here response = action ch.basic_publish(exchange='', routing_key=props.reply_to, properties=pika.BasicProperties(correlation_id = \ props.correlation_id), body=str(response)) ch.basic_ack(delivery_tag=method.delivery_tag)
import timeit NUM_INPUT = 11 def play(model): game_state = flappy.Game() game_state.init_elements() # Do nothing to get initial. state, _ = game_state.frame_step(0) # Move. while True: # Choose action. action = (np.argmax(model.predict(np.array([state]))[0])) # Take action. state, reward = game_state.frame_step(action) if reward == -1000: break if __name__ == "__main__": saved_model = 'results/saved-models/256-256-512-50000-ver19-300000.h5' model = neural_net(NUM_INPUT, [256, 256], saved_model) play(model)
if __name__ == "__main__": if TUNING: param_list = [] nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [40, 100, 400] buffers = [10000, 50000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: nn_param = [240, 160, 80] params = { "batchSize": 100, "buffer": 50000, "nn": nn_param } best_action_model = neural_net(NUM_INPUT, nn_param, NUM_OUTPUT) train_net(best_action_model, params)
NUM_SENSORS = 19 def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. reward, state = game_state.frame_step((2)) # Change this to "whilte True" to make it never die. while reward != -500: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. reward, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) print("Made it %d frames." % car_distance) if __name__ == "__main__": model = neural_net(NUM_SENSORS, True) play(model)
data = pd.read_csv('/home/prathamesh/venv/HAR_MiniProject/final_data1.csv') #enc = preprocessing.OneHotEncoder() #e = enc.fit_transform(data.iloc[:,-1].values.reshape(-1,1)) #enc.transform(data.iloc[:,-1].values.reshape(-1,1)) e = np_utils.to_categorical(data.iloc[:,-1].values) #print(e.shape) #data.iloc[4,1:7] X = data.iloc[:,1:7].values y = e X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42) #print(X_train.shape, X_test.shape, X_val.shape) model_1 = neural_net([6,9,6]) tr,v = model_1.sgd(X_train, y_train, batch_size=578, epochs=30, eta=0.1, lmbda=0.1, vldt=X_val, vldt_labels=y_val, k_fold=False) #for i,j in zip(tr,v): # print(i, j) ''' ftr = train_samples.shape[0]//batch_size samples = [train_samples[i:i+batch_size].reshape(batch_size,-1) for i in range(ftr)] labels = [train_labels[i:i+batch_size].reshape(batch_size,-1) for i in range(ftr)] if train_samples.shape[0]%batch_size != 0: t1 = train_labels[ftr*batch_size:].reshape(train_labels.shape[0] - batch_size*ftr,-1).all() labels.append(t1) samples.append(train_samples[ftr*batch_size:].reshape(train_samples.shape[0] - batch_size*ftr,-1).all()) '''
def load_model(self, filename): self.model = neural_net(self.sequence_length, self.number_of_actions, self.params["nn"], filename)
for memory in minibatch: # Get stored values. old_state_m, action_m, reward_m, new_state_m = memory # Get prediction on old state. old_qval = model.predict(old_state_m, batch_size=1) # Get prediction on new state. newQ = model.predict(new_state_m, batch_size=1) # Get our best move. I think? maxQ = np.max(newQ) y = np.zeros((1, 3)) y[:] = old_qval[:] # Check for terminal state. if reward_m != -500: # non-terminal state update = (reward_m + (GAMMA * maxQ)) else: # terminal state update = reward_m # Update the value for the action we took. y[0][action_m] = update X_train.append(old_state_m.reshape(NUM_SENSORS,)) y_train.append(y.reshape(3,)) X_train = np.array(X_train) y_train = np.array(y_train) return X_train, y_train if __name__ == "__main__": # Get the model and train our neural net! model = neural_net(NUM_SENSORS) train_net(model)
import keras import numpy as np import random from nn import neural_net import h5py import Usonic import time SAVED_MODEL = 'saved-models/ex1.h5' NUM_INPUT = 7 NUM_SENSOR_RESOLUTION = 39 NN_SIZE = np.array([256, 256]) test_cnt = 0 model = neural_net(NUM_INPUT, NN_SIZE, SAVED_MODEL) #model = neural_net(NUM_INPUT, NN_SIZE) start = time.time() while test_cnt < 3601: Dist = Usonic.distanceAll() / 10 #In = np.random.random_integers(NUM_SENSOR_RESOLUTION, size=(NUM_INPUT)) In = np.array( [Dist[2], Dist[2], Dist[0], Dist[0], Dist[0], Dist[1], Dist[1]]) In = In.astype(int) state = np.array([In]) action = np.argmax(model.predict(state, batch_size=1)) test_cnt += 1 #print(In) print("Number %d || action = %d || Dist = %d, %d, %d" % (test_cnt, action, Dist[0], Dist[1], Dist[2])) end = time.time() interval = end - start
def train_regressive_nn(d, p=15, epochs=10, alpha=0.5): """ Train a neural-net for regression with: p input units p/2 hidden units 1 output unit Returns the NN. """ ndat = p*((len(d)-p)/p)-p x_train = np.empty((ndat, p)) for i in xrange(ndat): x_train[i,:] = d[i:p+i] y_train = np.atleast_2d(d[p:p+ndat]).T assert x_train.shape[0]==y_train.shape[0] fname_str = "../data/SS_%d_alpha%0.3f_%s.cp"%(epochs, alpha, 'CE') print "Saving results to file : ", fname_str start_time = time.time() ntrain, n_in = x_train.shape arch = [p, p/2, 1] ## neural-net with 1 hidden layer of size p/2. print "Training set size = %d"%ntrain EPOCHS = epochs BATCH_SIZE = 200 MIN_GRAD = 1e-4 # if the magnitude of the gradient is smaller than this => stop. mag_dW = 2*MIN_GRAD eta = 1.0 ETAS = np.array([eta/(i+1)**alpha for i in xrange(EPOCHS)]) print " => learning rates: from %0.3f to %0.3f"%(ETAS[0], ETAS[-1]) NN = nn.neural_net(arch, [nn.f_tanh(), nn.f_tanh(), nn.f_identity()]) train_error = [] obj_error = [] run_time = [] ## iterate over each epoch: for t in xrange(EPOCHS): if mag_dW < MIN_GRAD: print "Stopping at epoch %d due to small gradient = %f"%(t, mag_dW) break rand_idx = np.arange(ntrain) np.random.shuffle(rand_idx) n_batch = int(math.ceil(ntrain / (BATCH_SIZE+0.0))) learn_rate = ETAS[t] ## do learning for each mini-batch: for i in xrange(n_batch): if t!=0: sys.stdout.write( "Epoch : % 4d/% 4d | error : train %0.3f | Batch : % 4d/% 4d"%(t+1, EPOCHS, train_error[-1], i+1, n_batch) ) else: sys.stdout.write( "Epoch : %04d/%04d | Batch : %04d/%04d "%(t+1, EPOCHS, i+1, n_batch) ) sys.stdout.flush() if i!= n_batch: idx_batch = rand_idx[i*BATCH_SIZE:(i+1)*BATCH_SIZE] else: idx_batch = rand_idx[i*BATCH_SIZE:] x, y = x_train[idx_batch,:], y_train[idx_batch,:] mag_dW, _, obj_err = NN.update_batch(nn.squared_error(), x, y, learn_rate) mag_dW = np.mean(mag_dW) sys.stdout.write('\r') sys.stdout.flush() if t%10==0: err = 0.0 for i in xrange(x_train.shape[0]): err += np.abs(y_train[i,:]-NN.classify(x_train[i,:],regress=True)) err /= (x_train.shape[0]+0.0) train_error.append(err) obj_error.append(obj_err) run_time.append(time.time()-start_time) cp.dump({'train_error':train_error, 'obj_error':obj_error, 'time': run_time}, open(fname_str,'w')) print return NN
START_DISTANCE = 0 def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. state, _, speed, _, _, _ = game_state.frame_step(START_ACTION, START_SPEED, START_DISTANCE) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. state, _, speed, _, _, _ = game_state.frame_step(action, speed, car_distance) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) if __name__ == "__main__": saved_model = 'saved-best_action_models/240-160-100-50000-100000.h5' model = neural_net(NUM_INPUT, [240, 160, 80], NUM_OUTPUT, saved_model) play(model)
train_net(best_action_model, params) else: print("Already tested.") if __name__ == "__main__": if TUNING: param_list = [] nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [40, 100, 400] buffers = [10000, 50000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: nn_param = [240, 160, 80] params = {"batchSize": 100, "buffer": 50000, "nn": nn_param} best_action_model = neural_net(NUM_INPUT, nn_param, NUM_OUTPUT) train_net(best_action_model, params)
# clock.tick(rate) print(car_reward) # time.sleep(0.1) if __name__ == "__main__": # saved_model = 'saved-models/5x5-82n-75n-100-50000-2000.h5' # model = neural_net(25, [82, 75], saved_model) # play_grid(model) nn = [164, 41] num_input = 3 saved_model = 'saved-models/lane_following-' + \ str(nn[0]) + 'n-' + str(nn[1]) + \ 'n-10000frames-50-50000buffer-rms-6000.h5' params = { 'nodes1': nn[0], 'nodes2': nn[1], 'x_dim': X_DIM, 'y_dim': Y_DIM, "batchSize": 100, "buffer": 50000, "nn": nn, 'solver': 'rms', 'num_actions': 3 } model = neural_net(num_input, params, saved_model) play_lane_following(model)
NUM_SENSORS = 3 def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. _, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) if __name__ == "__main__": saved_model = 'saved-models/164-150-400-50000-50000.h5' model = neural_net(NUM_SENSORS, [164, 150], saved_model) play(model)
def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state, stuff = game_state.frame_step((2)) # Move. while True: #time.sleep(0.05) car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. _, state, stuff = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("\n Current distance: %d frames." % car_distance) if __name__ == "__main__": saved_model = 'saved-models/BLE/final/FINAL164-150-100-50000-300000.h5' model = neural_net(NUM_SENSORS, [164, 150], saved_model) play(model)
if __name__ == "__main__": if TUNING: param_list = [] nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [40, 100, 400] buffers = [10000, 50000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: saved_model = 'saved-models/128-128-64-50000-100000-gen2.h5' trained_model = neural_net(NUM_INPUT, [128, 128], saved_model) nn_param = [128, 128] params = {"batchSize": 64, "buffer": 50000, "nn": nn_param} model = neural_net(NUM_INPUT, nn_param) train_net(model, trained_model, params)
game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) train_state = np.append(lastState, state[0]) train_state = np.append(train_state, lastaction) train_state = np.expand_dims(train_state, axis=0) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(train_state, batch_size=1))) print(action) # Take action. _, state = game_state.frame_step(action) train_state = np.append(lastState, state[0]) train_state = np.append(train_state, action) train_state = np.expand_dims(train_state, axis=0) lastState = state[0] # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) if __name__ == "__main__": saved_model = 'saved-models/128-128-400-100000-70000.h5' model = neural_net(NUM_SENSORS, [128, 128], saved_model, dropout=True) play(model)
def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. reward, state = game_state.frame_step((2)) # Change this to "whilte True" to make it never die. while reward != -500: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. reward, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) print("Made it %d frames." % car_distance) if __name__ == "__main__": model = neural_net(NUM_SENSORS, True) play(model)
def getRLAgentFE(self, W, i): #get the feature expectations of a new poliicy using RL agent IRL_helper(W, self.behavior, self.num_frames, i) # train the agent and save the model in a file used below saved_model = 'saved-models_'+self.behavior+'/evaluatedPolicies/'+str(i)+'-164-150-100-50000-'+str(self.num_frames)+'.h5' # use the saved model to get the FE model = neural_net(self.num_states, [164, 150], saved_model) return play(model, W)#return feature expectations by executing the learned policy
#Control parameters target_speed = 10 fallback_sec = 1 nn_actPoint = 21 #Sarsa 0 parameters PUNISH = -1000 GAMMA = 0.975 sarsa0P = ( PUNISH, GAMMA) sensor_h = [] #empty array for sensor handles sensor_val=np.array([]) #empty array for sensor measurements sensor_state=np.array([]) #empty array for sensor measurements #Initialize Neural Network model = neural_net(NUM_INPUT, nn_param) #Sensor handlers sensorList = ('Proximity_sensor1', 'Proximity_sensor2', 'Proximity_sensor3') sensor_errorCode, sensor_handles = tv.ObjectHandle(clientID, sensorList) #Read sensor raw data (first time initial) readval_errorCode, sensor_val, sensor_state = tv.INI_ReadProximitySensor(clientID, sensor_handles) #for x in range(1,3+1): #errorCode,sensor_handle=vrep.simxGetObjectHandle(clientID,'Proximity_sensor'+str(x),vrep.simx_opmode_oneshot_wait) #sensor_h.append(sensor_handle) #keep list of handles #errorCode,detectionState,detectedPoint,detectedObjectHandle,detectedSurfaceNormalVector=vrep.simxReadProximitySensor(clientID,sensor_handles[x-1],vrep.simx_opmode_streaming) #sensor_val=np.append(sensor_val,np.linalg.norm(detectedPoint)) #get list of values #sensor_state=np.append(sensor_state,detectionState) #get list of values
def train_net(model_path, params, weights, path, trainFrames, i, FEATSIZE, irl=True): print "start Training . . ." filename = params_to_filename(params) curDay = str(datetime.datetime.now().date()) curtime = str(datetime.datetime.now().time()) basePath = 'saved-models_' + path + '/evaluatedPoliciesTest/' subPath = curDay + '/' + curtime + '/' curDir = basePath + subPath os.makedirs(curDir) if os.path.exists(curDir): print "YES" observe = 100 # Number of frames to observe before training. epsilon = .5 train_frames = trainFrames # Number of frames to play. batchSize = params['batchSize'] buffer = params['buffer'] # Just stuff used below. max_car_distance = 0 car_distance = 0 t = 0 data_collect = [] replay = [] # stores tuples of (S, A, R, S'). loss_log = [] loss_plot = [] #Make changes here - read from a file # Create a new game instance. game = BE.createBoardIRL(sensor_size=FEATSIZE, display=False, saved_model=model_path, weights=weights, hidden_layers=params['nn']) #create the target network targetNetwork = neural_net(FEATSIZE, params['nn'], 4) stepcounter = 0 #keeps track of how many steps the learning network has taken ahead of the target network UPDATETARGET = 50 #no of times after which the target network needs to be updated targetNetwork.load_state_dict(game.agentBrain.state_dict()) targetNetwork.eval() targetNetwork.cuda() criterion = NN.SmoothL1Loss() #huber loss #criterion = NN.MSELoss() optimizer = optim.RMSprop(game.agentBrain.parameters(), lr=.001) # Get initial state by doing nothing and getting the state. #_, state, temp1 = game_state.frame_step((2)) game.reset() #after this point state is referred to as the sensor_readings state = game.sensor_readings # Let's time it. start_time = timeit.default_timer() # Run the frames. while t < train_frames: if t % 2000 == 0: print t plt.figure() plt.plot(loss_plot) pltfile = 'saved_plots/' + 'smallexperiment/' + 'featsize-' + str( FEATSIZE) + str(i) + '-' + 'epoch' + str(t) plt.savefig(pltfile + '.png') #playing.test_model(weights, ) t += 1 car_distance += 1 # Choose an action. so, as long as t < observe we take random actions? if random.random() < epsilon or t < observe: actionIndex = np.random.randint(0, 3) # random #3 action = game.agent_action_to_WorldAction(actionIndex) else: # Get Q values for each action. actionIndex = game.gen_action_from_agent() # *** agent_action_to_WorldAction() **** converts the action(which is basically an index that points to the action with the best qvalue according to the neural net) # *** to an action that is actually used in the game environment. An (x,y) tuple, which depicts the movement of the agent in the game environment # action [2] ---- > action [(3,4)] action = game.agent_action_to_WorldAction(actionIndex) #qval = model.predict(state, batch_size=1) #action = (np.argmax(qval)) # this step is already done in the method : gen_action_from_agent() #print ("action under learner ", action) # Take action, observe new state and get our treat. new_state, reward, done, _ = game.step(action) new_state = game.sensor_readings # Experience replay storage. replay.append((state, actionIndex, reward, new_state)) # If we're done observing, start training. if t > observe: # If we've stored enough in our buffer, pop the oldest. if len(replay) > buffer: replay.pop(0) # Randomly sample our experience replay memory #print len(replay) , batchSize minibatch = random.sample(replay, batchSize) # Get training values. X_train, y_train = process_minibatch( minibatch, game.agentBrain, targetNetwork) #instead of the model #print 'Xtrain',X_train #print 'y_train', y_train #print "Printing from train and test in learning.py :" #print type(X_train) , X_train.size #print type(y_train) , y_train.size # Train the model on this batch. #history = LossHistory() y_train = torch.from_numpy(y_train) y_train = y_train.type(torch.cuda.FloatTensor) #chagnes to be done from here #change the train method from keras to pytorch #X_train has to be a tensor of size n x 44 x 1 #y_train has to be a tensor of size n x 1 x 1 ?? output = game.agentBrain(X_train) loss = criterion(output, y_train) optimizer.zero_grad() loss.backward() #print loss.item() #print type(loss.item()) optimizer.step() stepcounter += 1 if stepcounter == UPDATETARGET: targetNetwork.load_state_dict(game.agentBrain.state_dict()) stepcounter = 0 #print 'Updated' loss_log.append([t, loss.item()]) loss_plot.append(loss.item()) # Update the starting state with S'. state = new_state # Decrement epsilon over time. if epsilon > 0.1 and t > observe: epsilon -= (1 / train_frames) # We died, so update stuff. if done == True: # Log the car's distance at this T. data_collect.append([ t, car_distance, ]) # Update max. if car_distance > max_car_distance: max_car_distance = car_distance # Time it. tot_time = timeit.default_timer() - start_time fps = car_distance / tot_time # Output some stuff so we can watch. #print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" % #(max_car_distance, t, epsilon, car_distance, fps)) # Reset. game.reset() car_distance = 0 start_time = timeit.default_timer() # Save the model if t % 2000 == 0: #game.agentBrain.save_weights('saved-models_'+ path +'/evaluatedPolicies/'+str(i)+'-'+ filename + '-' + # str(t) + '.h5', # overwrite=True) torch.save( game.agentBrain.state_dict(), 'saved-models_' + path + '/evaluatedPoliciesTest/' + subPath + str(i) + '-' + filename + '-' + str(t) + '.h5', ) savedFilename = 'saved-models_' + path + '/evaluatedPoliciesTest/' + subPath + str( i) + '-' + filename + '-' + str(t) + '.h5' with open('results/model_paths.txt', 'w') as ff: ff.write('saved-models_' + path + '/evaluatedPoliciesTest/' + subPath + str(i) + '-' + filename + '-' + str(t) + '.h5\n') ff.close() print("Saving model %s - %d" % (filename, t)) # Log results after we're done all frames. log_results(filename, data_collect, loss_log, i) #print "Testing the model :" return savedFilename
if t % 25000 == 0: model.save_weights('saved-models/' + filename + '-' + str(t) + '.h5', overwrite=True) print("Saving model %s - %d" % (filename, t)) log_results(filename, data_collect, loss_log) if __name__ == '__main__': if TUNING: param_list = [] nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [40, 100, 400] buffers = [10000, 50000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: nn_param = [164, 150] params = {"batchSize": 100, "buffer": 50000, "nn": nn_param} model = nn.neural_net(NUM_INPUT, nn_param) train_net(model, params)
def __init__(self, params, load_replay_file=None, save_replay_file_prefix="replay", save_model_file_prefix="saved-models/", save_every=500, end_value=-500): # This where the input values are saved so they can be used in other functions whithin the class self.params = params # sequence_length specifies the number of commands that form a state self.sequence_length = params['sequence_length'] # number_of_actions specifies the number of possible actions self.number_of_actions = params["number_of_actions"] # The neural network is build here self.model = neural_net(self.sequence_length, self.number_of_actions, params["nn"]) # The name that will be used when saving the neural network model self.filename = params_to_filename(params) # Specifes the number of states to be inputed in replay before saving self.save_every = save_every # A prefix to the name of the replay file when saved self.save_replay_file_prefix = save_replay_file_prefix self.save_model_file_prefix = save_model_file_prefix # The value check for at the end of the "game" self.end_value = end_value # Forgetting value self.GAMMA = params["GAMMA"] self.observe = 1000 # Number of frames to observe before training. self.epsilon = 1 # Chance to choose random action self.train_frames = 10000 # Number of frames to play. self.batchSize = params['batchSize'] self.buffer = params['buffer'] if isinstance(params["cmd2number_reward"], str): #if string load from file self.cmd2number_reward = pickle.load( open(params["cmd2number_reward"], "rb")) else: # if dictionary self.cmd2number_reward = params["cmd2number_reward"] self.state = np.zeros(params["sequence_length"]) self.state_index = 0 # Just stuff used below. self.max_hacker_cmds = 0 self.hacker_cmds = 0 self.data_collect = [] if load_replay_file is None: self.replay = [] # stores tuples of (S, A, R, S'). else: self.replay = pickle.load(open(load_replay_file, "rb")) self.t = len(self.replay) self.loss_log = [] # Let's time it. self.start_time = timeit.default_timer() self.state = np.zeros(10) self.lastAction = 0
if __name__ == "__main__": if TUNING: param_list = [] nn_params = [[20, 20], [164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [32, 40, 100, 400] buffers = [10000, 50000, 500000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: nn_param = [1000, 1000] params = { "batchSize": 40, "buffer": 500000, "nn": nn_param } model = neural_net(NUM_SENSORS, nn_param) train_net(model, params)
# Take action. immediateReward, state, readings = game_state.frame_step(action) #print ("immeditate reward:: ", immediateReward) #print ("readings :: ", readings) #start recording feature expectations only after 100 frames if car_distance > 100: featureExpectations += (GAMMA**(car_distance - 101)) * np.array(readings) #print ("Feature Expectations :: ", featureExpectations) # Tell us something. if car_distance % 2000 == 0: print("Current distance: %d frames." % car_distance) break return featureExpectations if __name__ == "__main__": # ignore BEHAVIOR = sys.argv[1] ITERATION = sys.argv[2] FRAME = sys.argv[3] saved_model = 'saved-models_' + BEHAVIOR + '/evaluatedPolicies/' + str( ITERATION) + '-164-150-100-50000-' + str(FRAME) + '.h5' weights = [ -0.79380502, 0.00704546, 0.50866139, 0.29466834, -0.07636144, 0.09153848, -0.02632325, -0.09672041 ] model = neural_net(NUM_STATES, [164, 150], saved_model) print(play(model, weights))
car_distance = 0 game_state = main.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) exit = False # Move. while not exit: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. _, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) # Event queue for event in pygame.event.get(): if event.type == pygame.QUIT: exit = True if __name__ == "__main__": saved_model = 'saved-models/320-320-1000-50000-100000.h5' model = neural_net(NUM_SENSORS, [320, 320], saved_model) play(model)
param_list = [] nn_params = [[164, 150], [256, 256], [512, 512], [1000, 1000]] batchSizes = [40, 100, 400] buffers = [10000, 50000] for nn_param in nn_params: for batchSize in batchSizes: for buffer in buffers: params = { "batchSize": batchSize, "buffer": buffer, "nn": nn_param } param_list.append(params) for param_set in param_list: launch_learn(param_set) else: nn_param = [128, 128] params = { "batchSize": 64, "buffer": 50000, "nn": nn_param } saved_model = 'saved-models/128-128-64-50000-100000.h5' model = neural_net(NUM_INPUT, [128, 128], saved_model) # model = neural_net(NUM_INPUT, nn_param) train_net(model, params)
import keras import numpy as np import random from nn import neural_net import h5py import Usonic import time SAVED_MODEL = 'saved-models/ex1.h5' NUM_INPUT = 7 NUM_SENSOR_RESOLUTION = 39 NN_SIZE = np.array([256, 256]) test_cnt = 0 model = neural_net(NUM_INPUT, NN_SIZE, SAVED_MODEL) #model = neural_net(NUM_INPUT, NN_SIZE) start = time.time() while test_cnt < 3601: Dist = Usonic.distanceAll() / 10 #In = np.random.random_integers(NUM_SENSOR_RESOLUTION, size=(NUM_INPUT)) In = np.array([Dist[2],Dist[2],Dist[0],Dist[0],Dist[0],Dist[1],Dist[1]]) In = In.astype(int) state = np.array([In]) action = np.argmax(model.predict(state, batch_size = 1)) test_cnt +=1 #print(In) print("Number %d || action = %d || Dist = %d, %d, %d" %(test_cnt, action, Dist[0], Dist[1], Dist[2])) end = time.time() interval = end - start print("Time Elaspsed: %d sec" %interval)
NUM_SENSORS = 5 def play(model): car_distance = 0 game_state = carmunk.GameState() # Do nothing to get initial. _, state = game_state.frame_step((2)) # Move. while True: car_distance += 1 # Choose action. action = (np.argmax(model.predict(state, batch_size=1))) # Take action. _, state = game_state.frame_step(action) # Tell us something. if car_distance % 1000 == 0: print("Current distance: %d frames." % car_distance) if __name__ == "__main__": saved_model = 'saved-models/128-128-64-50000-50000.h5' model = neural_net(NUM_SENSORS, [128, 128], saved_model) play(model)
# # for param_set in param_list: # launch_learn(param_set) # # else: nn_param = [164, 150] nn_param = [164, 0] nn_param = [64, 32] nn_param = [12, 8] nn_param = [164, 41] params = { 'nodes1': nn_param[0], 'nodes2': nn_param[1], 'x_dim': X_DIM, 'y_dim': Y_DIM, "batchSize": 50, "buffer": 50000, "nn": nn_param, 'solver': 'rms', 'num_actions': 3 } model = neural_net(NUM_INPUT, params) print('made model') train_net(model, params, 'lane_following') play_lane_following(model) # play(model)