plt.title("Location Graph") for n in range(env.num_vehicles): if (n < env.num_leading_cars): plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], color='b') elif (n == env.num_leading_cars): plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], "g") else: plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], "r") plt.ylabel("Location") plt.xlabel("Time") plt.show() # CAV Simulator (Generates Fake Data now) env = Simulator(num_leading_vehicle, num_following_vehicle) env.normalize = False #env.verbose = True num_episodes = num_eps rewards = [] for i in range(num_episodes): # data_t = [] data_d = [] start_disp = None # s = env.reset() # env.normalize = True start_disp = env.center_state(env.current_states[0])
torch.save(agent.state_dict(), './cem_cartpole.pth') # Path to save model to print('Episode {}\tBest Average Score: {:.2f}'.\ format(i_iter, np.mean(scores_deque))) print('Episode {}\tAll Average Score: {:.2f}\tAll SE Score: {:.2f}'.\ format(i_iter, np.mean(rewards), np.std(rewards)/(len(rewards)**0.5))) return agent, scores if __name__ == "__main__": # Variable to designate train or just load from path and test train = True # env = Simulator(num_leading_vehicle, num_following_vehicle) print('observation space:', env.observation_space) print('action space:', env.action_space) agent = Agent(env) # if start_from_init: print("Started from CACC initialization") agent.load_state_dict(torch.load('./mimic_cav_90_.pth')) # if train: agent, scores = cem(agent) # evaluate # load the weights from file
loss.backward() optimizer.step() # Does the update print("Loss", loss) acc = (((torch.argmax(F.softmax(output), 1, keepdim=True) == target).float().sum()).numpy() / (len(target))) * 100 print("Accuracy", str(acc) + "%") #print(random.sample([x[1] for x in batch],10)) data_loss.append(loss.detach().numpy()) data_acc.append(acc / 100) return loss.detach().numpy(), acc if __name__ == "__main__": env = Simulator(num_leading_vehicle, num_following_vehicle) print('observation space:', env.observation_space) print('action space:', env.action_space) agent = Agent(env) # evaluate # load the weights from file #agent.load_state_dict(torch.load('./cem_cartpole.pth')) #agent.load_state_dict(torch.load('./cem_cartpole_5.pth')) # Path to load model from #agent.load_state_dict(torch.load('./cem_cartpole.pth')) num_episodes = 1000 rewards = [] Replay_Buffer = deque(maxlen=10000)
def create_loc_map(env): plt.title("Location Graph") for n in range(env.num_vehicles): if(n < env.num_leading_cars): plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1], color='b') elif(n == env.num_leading_cars): plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1],"g") else: plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1],"r") plt.ylabel("Location") plt.xlabel("Time") plt.show() env = Simulator(num_leading_vehicle,num_following_vehicle) env.normalize = False #env.verbose = True num_episodes = num_eps results = [] for i in range(num_episodes): # data_t = [] data_d = [] start_disp = None # s = env.reset() # env.normalize = True start_disp = env.center_state(env.current_states[0])
from __future__ import division # modify from https://github.com/udacity/deep-reinforcement-learning/blob/master/cross-entropy/CEM.ipynb import numpy as np import gym from gym import wrappers from collections import deque import torch import torch.nn as nn from CAVSimulator0910 import Simulator env = Simulator(3, 0) import argparse import sys sys.path.append('../../keras-rl') from PIL import Image import numpy as np import gym from keras.models import Model from keras.layers import Flatten, Convolution2D, Input, Dense from keras.optimizers import Adam import keras.backend as K from rl.agents.dqn import DQNAgent from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy from rl.memory import SequentialMemory from rl.core import Processor from rl.callbacks import TrainEpisodeLogger, ModelIntervalCheckpoint from keras.models import Sequential
from __future__ import division # modify from https://github.com/udacity/deep-reinforcement-learning/blob/master/cross-entropy/CEM.ipynb import numpy as np import gym from gym import wrappers from collections import deque import torch import torch.nn as nn from CAVSimulator0910 import Simulator num_leading_vehicle = 3 env = Simulator(num_leading_vehicle, 0) #!/usr/bin/env python import pickle import tensorflow as tf import numpy as np import tf_util import gym def main(): #=========================================================================== # generate expert data #=========================================================================== # param envname = 'CAV_Controller' render = 0