plt.title("Location Graph")

    for n in range(env.num_vehicles):
        if (n < env.num_leading_cars):
            plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], color='b')
        elif (n == env.num_leading_cars):
            plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], "g")
        else:
            plt.plot(np.array(data_d)[:, n] + start_disp[n * 3 + 1], "r")
    plt.ylabel("Location")
    plt.xlabel("Time")
    plt.show()


# CAV Simulator (Generates Fake Data now)
env = Simulator(num_leading_vehicle, num_following_vehicle)
env.normalize = False
#env.verbose = True
num_episodes = num_eps
rewards = []

for i in range(num_episodes):
    #
    data_t = []
    data_d = []
    start_disp = None
    #
    s = env.reset()
    #
    env.normalize = True
    start_disp = env.center_state(env.current_states[0])
Beispiel #2
0
        torch.save(agent.state_dict(),
                   './cem_cartpole.pth')  # Path to save model to

        print('Episode {}\tBest Average Score: {:.2f}'.\
              format(i_iter, np.mean(scores_deque)))
        print('Episode {}\tAll Average Score: {:.2f}\tAll SE Score: {:.2f}'.\
              format(i_iter, np.mean(rewards), np.std(rewards)/(len(rewards)**0.5)))

    return agent, scores


if __name__ == "__main__":
    # Variable to designate train or just load from path and test
    train = True
    #
    env = Simulator(num_leading_vehicle, num_following_vehicle)

    print('observation space:', env.observation_space)
    print('action space:', env.action_space)

    agent = Agent(env)
    #
    if start_from_init:
        print("Started from CACC initialization")
        agent.load_state_dict(torch.load('./mimic_cav_90_.pth'))
    #
    if train:
        agent, scores = cem(agent)

    # evaluate
    # load the weights from file
Beispiel #3
0
        loss.backward()
        optimizer.step()  # Does the update
        print("Loss", loss)

        acc = (((torch.argmax(F.softmax(output), 1, keepdim=True)
                 == target).float().sum()).numpy() / (len(target))) * 100
        print("Accuracy", str(acc) + "%")
        #print(random.sample([x[1] for x in batch],10))
        data_loss.append(loss.detach().numpy())
        data_acc.append(acc / 100)

    return loss.detach().numpy(), acc


if __name__ == "__main__":
    env = Simulator(num_leading_vehicle, num_following_vehicle)

    print('observation space:', env.observation_space)
    print('action space:', env.action_space)

    agent = Agent(env)

    # evaluate
    # load the weights from file
    #agent.load_state_dict(torch.load('./cem_cartpole.pth'))
    #agent.load_state_dict(torch.load('./cem_cartpole_5.pth')) # Path to load model from
    #agent.load_state_dict(torch.load('./cem_cartpole.pth'))
    num_episodes = 1000
    rewards = []

    Replay_Buffer = deque(maxlen=10000)
def create_loc_map(env):
    plt.title("Location Graph")

    for n in range(env.num_vehicles):
        if(n < env.num_leading_cars):
            plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1], color='b')
        elif(n == env.num_leading_cars):
            plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1],"g")
        else:
            plt.plot(np.array(data_d)[:,n] + start_disp[n*3 + 1],"r")
    plt.ylabel("Location")
    plt.xlabel("Time")
    plt.show()

env = Simulator(num_leading_vehicle,num_following_vehicle)
env.normalize = False
#env.verbose = True
num_episodes = num_eps
results = []

for i in range(num_episodes):
    #
    data_t = []
    data_d = []
    start_disp = None
    #
    s = env.reset()
    #
    env.normalize = True
    start_disp = env.center_state(env.current_states[0])
from __future__ import division

# modify from https://github.com/udacity/deep-reinforcement-learning/blob/master/cross-entropy/CEM.ipynb
import numpy as np
import gym
from gym import wrappers
from collections import deque
import torch
import torch.nn as nn

from CAVSimulator0910 import Simulator

env = Simulator(3, 0)

import argparse
import sys
sys.path.append('../../keras-rl')
from PIL import Image
import numpy as np
import gym
from keras.models import Model
from keras.layers import Flatten, Convolution2D, Input, Dense
from keras.optimizers import Adam
import keras.backend as K
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import TrainEpisodeLogger, ModelIntervalCheckpoint

from keras.models import Sequential
Beispiel #6
0
from __future__ import division

# modify from https://github.com/udacity/deep-reinforcement-learning/blob/master/cross-entropy/CEM.ipynb
import numpy as np
import gym
from gym import wrappers
from collections import deque
import torch
import torch.nn as nn

from CAVSimulator0910 import Simulator

num_leading_vehicle = 3
env = Simulator(num_leading_vehicle, 0)

#!/usr/bin/env python

import pickle
import tensorflow as tf
import numpy as np
import tf_util
import gym


def main():
    #===========================================================================
    # generate expert data
    #===========================================================================
    # param
    envname = 'CAV_Controller'
    render = 0