コード例 #1
0
def initialize_data():
    """
    initialize database using actor and movie data from scraped json files
    """
    actor_data = json.load(open("actor.json"))
    movie_data = json.load(open("movie.json"))
    movie_objects = {}
    actor_objects = {}
    #logger.info('load data from json and prepare to construct data structure')

    for movie in movie_data:
        new_movie = Movie(movie["movieName"], movie["movieYear"], normalize_grossing(movie["movieGrossing"]), [])
        movie_objects[new_movie.name] = new_movie
        for actor in movie["movieStaring"]:

            for available_actor in actor_data:
                if available_actor["actorName"] == actor:

                    if available_actor["actorName"] not in actor_objects:
                        actor_objects[available_actor["actorName"]] = Actor(available_actor["actorName"], normalize_age(available_actor["actorAge"]), [], 0)
                    actor_objects[available_actor["actorName"]].act_movie.append(new_movie)
                    if new_movie.grossing != None:
                        actor_objects[available_actor["actorName"]].total_grossing+=new_movie.grossing

                    new_movie.attend_actor.append(actor_objects[available_actor["actorName"]])
                    break

    for actor in actor_data:
        if actor["actorName"] not in actor_objects:
            actor_objects[actor["actorName"]] = Actor(actor["actorName"], normalize_age(actor["actorAge"]), [], 0)

    return movie_objects, actor_objects
コード例 #2
0
ファイル: DDPG.py プロジェクト: YoungPeak/Quadcopter_2
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
コード例 #3
0
    def __init__(self,
                 seed,
                 n_state,
                 n_action,
                 batch_size=64,
                 buffer=1e5,
                 gamma=0.99,
                 lr_actor=1e-4,
                 lr_critic=1e-3,
                 weight_decay=0,
                 tau=1e-3):
        self.batch_size = batch_size

        #init actor
        self.local_actor = Actor(n_state, n_action, seed).to(device)
        self.target_actor = Actor(n_state, n_action, seed).to(device)
        self.optim_actor = torch.optim.Adam(self.local_actor.parameters(),
                                            lr=lr_actor)
        #init critic
        self.local_critic = Critic(n_state, n_action, seed).to(device)
        self.target_critic = Critic(n_state, n_action, seed).to(device)
        self.optim_critic = torch.optim.Adam(self.local_critic.parameters(),
                                             lr=lr_critic,
                                             weight_decay=weight_decay)

        #init memory
        self.memory = memory(int(buffer), device, seed)
        self.tau = tau
        self.gamma = gamma
        self.noise = noise(n_action, seed=seed)
コード例 #4
0
    def __init__(self, state_size, action_size):
        """
        Initializes Agent object.
        @Param:
        1. state_size: dimension of each state.
        2. action_size: number of actions.
        """
        self.state_size = state_size
        self.action_size = action_size
        
        #Actor network
        self.actor_local = Actor(self.state_size, self.action_size).to(device) #local model
        self.actor_target = Actor(self.state_size, self.action_size).to(device) #target model, TD-target
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) #initialize optimizer using Adam as regularizer for Actor network.

        #Critic network
        self.critic_local = Critic(self.state_size, self.action_size).to(device) #local model
        self.critic_target = Critic(self.state_size, self.action_size).to(device) #target model, TD-target
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) #initialize optimizer using Adam as regularizer for Critic network.

        #Noise proccess
        self.noise = OUNoise(action_size) #define Ornstein-Uhlenbeck process

        #Replay memory
        self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, MINI_BATCH) #define experience replay buffer object
コード例 #5
0
ファイル: agent.py プロジェクト: bebbo203/DDPG
    def __init__(self, state_size, action_size, max_action, minibatch_size,
                 a_lr, c_lr, gamma, tau):
        self.state_size = state_size
        self.action_size = action_size
        self.max_action = max_action

        self.critic_lr = c_lr
        self.actor_lr = a_lr

        self.actor_network = Actor(self.state_size, self.action_size,
                                   self.max_action, self.actor_lr)
        self.actor_target_network = Actor(self.state_size, self.action_size,
                                          self.max_action, self.actor_lr)
        self.critic_network = Critic(self.state_size, self.action_size,
                                     self.critic_lr)
        self.critic_target_network = Critic(self.state_size, self.action_size,
                                            self.critic_lr)

        self.actor_target_network.set_weights(self.actor_network.get_weights())
        self.critic_target_network.set_weights(
            self.critic_network.get_weights())

        self.critic_optimizer = optimizers.Adam(learning_rate=self.critic_lr)
        self.actor_optimizer = optimizers.Adam(learning_rate=self.actor_lr)

        self.replay_buffer = ReplayBuffer(1e6)
        self.MINIBATCH_SIZE = minibatch_size
        self.GAMMA = tf.cast(gamma, dtype=tf.float64)
        self.TAU = tau
        self.noise = OUNoise(self.action_size)
コード例 #6
0
    def __init__(self, state_size, batch_size, is_eval=False):
        self.state_size = state_size
        self.action_size = 3  #buy,sell,hold

        #defining replay memory size
        self.buffer_size = 1000000
        self.batch_size = batch_size
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
        self.inventory = []

        #define wether or not training is going on
        self.is_eval = is_eval
        #Discount factor
        self.gamma = 0.99
        # soft update for AC model
        self.tau = 0.001

        #instantiate the local and target actor models for soft updates
        self.actor_local = Actor(self.state_size, self.action_size)
        self.actor_target = Actor(self.state_size, self.action_size)

        #critic model mapping state-action pairs with Q-values
        self.critic_local = Critic(self.state_size, self.action_size)

        #instantiate the local and target critic models for soft updates
        self.critic_target = Critic(self.state_size, self.action_size)
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        #set target model parameter to local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
コード例 #7
0
def test_movie():
    # check_boolean_equality_function
    movie = Movie("Moana", 2009)
    print(movie)

    movie3 = Movie("Moana", 2010)
    print(movie3)

    movie2 = Movie("Inception", 2010)
    print(movie2)

    print(movie > movie2)
    print(movie < movie3)
    print(movie3 == movie3)

    # check_remove_actor_in_list_of_actors
    actors = [Actor("Auli'i Cravalho"), Actor("Dwayne Johnson"), Actor("Rachel House"), Actor("Temuera Morrison")]
    for actor in actors:
        movie.add_actor(actor)
    movie.remove_actor(Actor("Auli'i Cravalho"))
    print(movie.actors)

    # check_for_out_of_range_runtime
    movie.runtime_minutes = 121
    print("Movie runtime: {} minutes".format(movie.runtime_minutes))

    movie.external_rating = 30
    print("votes: {}".format(movie.external_rating))
コード例 #8
0
    def __init__(self, state_size, action_size, params, seed):
        """Initialize a DDPG agent
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            params (Params): hyperparameters 
            seed (int): random seed
        """

        self.gamma = params.gamma
        self.tau = params.tau
        self.seed = np.random.seed(seed)

        # actor networks
        self.actor_local = Actor(state_size, action_size, params.units_actor,
                                 seed).to(device)
        self.actor_target = Actor(state_size, action_size, params.units_actor,
                                  seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          params.lr_actor)

        # critic newtworks
        self.critic_local = Critic(state_size, action_size,
                                   params.units_critic, seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    params.units_critic, seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           params.lr_critic)

        # Noise process
        self.noise = OUNoise(action_size, seed, params.mu, params.theta,
                             params.sigma)
コード例 #9
0
    def __init__(self, state_size=24, action_size=2, random_seed=0):
        """
        Initializes Agent object.
        @Param:
        1. state_size: dimension of each state.
        2. action_size: number of actions.
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        #Actor network
        self.actor_local = Actor(self.state_size, self.action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        #Critic network
        self.critic_local = Critic(self.state_size, self.action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC)

        #Noise proccess
        self.noise = OUNoise(action_size,
                             random_seed)  #define Ornstein-Uhlenbeck process

        #Replay memory
        self.memory = ReplayBuffer(
            self.action_size, BUFFER_SIZE, MINI_BATCH,
            random_seed)  #define experience replay buffer object
コード例 #10
0
ファイル: engine.py プロジェクト: Coul33t/iso_tbs
    def init_game(self):
        # self.gamemap.load_map_from_json('res/map/test_map.json')
        self.gamemap.create_default_terrain()

        for i in range(3, 6):
            self.actors.append(Actor('soldier', 's', 0, sprite=0xE100, color=TEAM_COLORS[0], x=i, y=1, movement=1,
                                     stats=Stats(3,3,1)))
        for i in range(0, 10, 2):
            self.actors.append(Actor('barbarian', 'b', 1, sprite=0xE101, color=TEAM_COLORS[1], x=i, y=8, movement=2,
                                     stats=Stats(3,2,0)))


        self.actors.append(Actor('king', 'K', 0, sprite=0xE102, color=TEAM_COLORS[0], x=4, y=0,
                                 movement=2, stats=Stats(5,3,4)))
        self.actors.append(Actor('leader', 'L', 1, sprite=0xE103, color=TEAM_COLORS[1], x=4, y=9,
                                 movement=2, stats=Stats(7,4,2)))

        self.actors.append(Actor('Xander', 'S', 2, sprite=0xE104, color=TEAM_COLORS[2], x=5, y=5,
                                 movement=10, stats=Stats(7,40,2)))

        self.turn_to_take = self.actors.copy()
        self.turn_to_take.sort(key=lambda x: x.stats.mod['agility'], reverse=True)
        self.unit_turn = self.turn_to_take.pop(0)
        self.unit_turn.new_turn()
        self.game_state = 'new_turn'
コード例 #11
0
    def __init__(self, task):
        self.task=task
        self.state_size=task.state_size
        self.action_size=task.action_size
        self.action_low=task.action_low
        self.action_high=task.action_high

        self.actor_local=Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target=Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.critic_local=Critic(self.state_size, self.action_size)
        self.critic_target=Critic(self.state_size, self.action_size)

        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        self.mu=0
        self.theta=0.2 
        self.sigma=0.005 # random noise
        self.noise=Noise(self.action_size, self.mu, self.theta, self.sigma)
        self.gamma=0.9 
        self.tau=0.1 
        self.best_score=-np.inf
        self.score=0
        
        self.buffer_size=100000
        self.batch_size=64
        self.memory=ReplayBuffer(self.buffer_size, self.batch_size)
コード例 #12
0
    def __init__(self, state_size, action_size, random_seed, hyperparams):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.hyperparams = hyperparams

        self.actor = Actor(state_size, action_size, random_seed).to(device)
        self.actor_noise = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optim = optim.Adam(self.actor.parameters(),
                                      lr=hyperparams.alpha_actor)

        self.critic = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optim = optim.Adam(
            self.critic.parameters(),
            lr=hyperparams.alpha_critic,
            weight_decay=hyperparams.weight_decay,
        )

        self.replay_buffer = ReplayBuffer(hyperparams.buffer_size,
                                          hyperparams.batch_size, random_seed)

        self.noise = OUNoise(
            action_size,
            random_seed,
            self.hyperparams.mu,
            self.hyperparams.theta,
            self.hyperparams.sigma,
        )
コード例 #13
0
 def __init__(self, n_agents, state_size, action_size, seed):
     critic_input_size = (state_size+action_size)*n_agents
     
     self.actor_regular = Actor(state_size, action_size, seed).to(DEVICE)
     self.actor_target = Actor(state_size, action_size, seed).to(DEVICE)
     
     self.critic_regular = Critic(critic_input_size, seed).to(DEVICE)
     self.critic_target = Critic(critic_input_size, seed).to(DEVICE)
コード例 #14
0
    def __init__(self,
                 input_dim,
                 action_dim,
                 action_scale,
                 memory_size,
                 gamma,
                 tau,
                 learning_rate_actor=1e-3,
                 learning_rate_critic=1e-3,
                 device_name="cpu:0",
                 checkpoint_directory="ckpt/"):
        super(DDPG, self).__init__()
        self.input_dim = input_dim
        self.action_dim = action_dim
        self.action_scale = action_scale
        self.memory_size = memory_size
        self.replay_memory = ReplayMemory(memory_size)
        self.gamma = gamma
        self.tau = tau
        self.learning_rate_actor = learning_rate_actor
        self.learning_rate_critic = learning_rate_critic
        self.device_name = device_name

        self.checkpoint_directory = checkpoint_directory
        if not os.path.exists(self.checkpoint_directory):
            os.makedirs(self.checkpoint_directory)

        # actor
        self.actor_active = Actor(self.input_dim,
                                  self.action_dim,
                                  self.action_scale,
                                  name="actor_active")
        self.actor_target = Actor(self.input_dim,
                                  self.action_dim,
                                  self.action_scale,
                                  name="actor_target")
        self.actor_target.trainable = False

        # critic
        self.critic_active = Critic(self.input_dim,
                                    self.action_dim,
                                    name="critic_active")
        self.critic_target = Critic(self.input_dim,
                                    self.action_dim,
                                    name="critic_target")
        self.critic_target.trainable = False

        # optimizer
        self.optimizer_actor = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate_actor)
        self.optimizer_critic = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate_critic)

        # logging
        self.global_step = 0
コード例 #15
0
ファイル: agent.py プロジェクト: tahsmith/drlnd-p2-reacher
    def __init__(self, device, state_size, action_size, buffer_size=10,
                 batch_size=10,
                 actor_learning_rate=1e-4,
                 critic_learning_rate=1e-3,
                 discount_rate=0.99,
                 tau=0.1,
                 steps_per_update=4,
                 action_range=None,
                 dropout_p=0.0,
                 weight_decay=0.0001,
                 noise_max=0.2,
                 noise_decay=1.0,
                 n_agents=1
                 ):
        self.device: torch.device = device
        self.state_size = state_size
        self.action_size = action_size

        self.critic_control = Critic(state_size, action_size).to(device)
        self.critic_control.dropout.p = dropout_p
        self.critic_target = Critic(state_size, action_size).to(device)
        self.critic_target.eval()
        self.critic_optimizer = torch.optim.Adam(
            self.critic_control.parameters(),
            weight_decay=weight_decay,
            lr=critic_learning_rate)

        self.actor_control = Actor(state_size, action_size, action_range).to(
            device)
        self.actor_control.dropout.p = dropout_p
        self.actor_target = Actor(state_size, action_size, action_range).to(
            device)
        self.actor_target.eval()
        self.actor_optimizer = torch.optim.Adam(
            self.actor_control.parameters(),
            weight_decay=weight_decay,
            lr=actor_learning_rate)

        self.batch_size = batch_size
        self.min_buffer_size = batch_size
        self.replay_buffer = ReplayBuffer(device, state_size, action_size,
                                          buffer_size)

        self.discount_rate = discount_rate

        self.tau = tau

        self.step_count = 0
        self.steps_per_update = steps_per_update

        self.noise_max = noise_max
        self.noise = OUNoise([n_agents, action_size], 15071988, sigma=self.noise_max)
        self.noise_decay = noise_decay
        self.last_score = float('-inf')
コード例 #16
0
    def __init__(self, env):
        """

        :param task: (class instance) Instructions about the goal and reward
        """

        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.shape[0]
        self.action_low = env.action_space.low
        self.action_high = env.action_space.high
        self.score = 0.0
        self.best = 0.0

        # Instances of the policy function or actor and the value function or critic
        # Actor critic with Advantage

        # Actor local and target
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)

        # Save actor model for future use
        actor_local_model_yaml = self.actor_local.model.to_yaml()
        with open("actor_local_model.yaml", "w") as yaml_file:
            yaml_file.write(actor_local_model_yaml)

        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic local and target
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model with local model
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Initialize the Gaussin Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Initialize the Replay Memory
        self.buffer_size = 100000
        self.batch_size = 64  # original 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Parameters for the Algorithm
        self.gamma = 0.99  # Discount factor
        self.tau = 0.01  # Soft update for target parameters Actor Critic with Advantage
コード例 #17
0
    def restart_game(self, btn):

        self.isGameOver = False
        self.player = Actor("Player", False, "X")
        self.enemy = Actor("Enemy", True, "O")
        self.lstAvailableChoice = list(self.dictIndexToButtonName.keys())

        self.player.start_first()

        self.set_all_button_text("")
        self.set_all_button_disable(False)
コード例 #18
0
    def __init__(self, state_dim, action_dim, max_action):
        self.actor = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters())

        self.critic = Critic(state_dim, action_dim).to(device)
        self.critic_target = Critic(state_dim, action_dim).to(device)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters())

        self.max_action = max_action
コード例 #19
0
    def __init__(self, task, buffer_size, batch_size, gamma, tau,
                 actor_dropout, critic_dropout, exploration_theta,
                 exploration_sigma, actor_lr, critic_lr):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.actor_dropout = actor_dropout
        self.critic_dropout = critic_dropout
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high,
                                 self.actor_dropout, self.actor_lr)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high,
                                  self.actor_dropout, self.actor_lr)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.critic_dropout, self.critic_lr)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.critic_dropout, self.critic_lr)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 5
        self.exploration_theta = exploration_theta
        self.exploration_sigma = exploration_sigma
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.memory = PrioritizedReplayBuffer(self.buffer_size,
                                              self.batch_size)

        # Algorithm parameters
        self.gamma = gamma  # discount factor
        self.tau = tau  # for soft update of target parameters

        self.best_score = -np.inf
    def setUp(self):
        """ Creates a test fixture before each test method is run """
        self.actor1 = Actor("Sub", "Hossan", "A01050900",
                            "2012-12-25 00:00:00", 3)
        self.actor2 = Actor("Ewan", "Watt", "A01020509", "1995-04-08 00:00:00",
                            0)

        self.model1 = Model("Ashvan", "Wal", "A01023474",
                            "2000-01-15 00:00:00", "commercial")
        self.model2 = Model("Phuong", "Ho", "A01023444", "2011-05-23 00:00:00",
                            "vedette")

        self.talent1 = TalentAgency("testresults.json")
        self.logPoint()
コード例 #21
0
    def __init__(self):
        super(MainLayer, self).__init__()
        self.player = Actor(320, 240, (0, 0, 255))
        self.add(self.player)
        for pos in [(100, 100), (540, 380), \
                (540, 100), (100,380)]:
            self.add(Actor(pos[0], pos[1], (255, 0, 0)))

        cell = self.player.width * 1.25
        self.collman = cm.CollisionManagerGrid(0, 640, 0, 480, cell, cell)

        self.speed = 100.0
        self.pressed = defaultdict(int)
        self.schedule(self.update)
コード例 #22
0
 def __init__(self, act_dim, env_dim, act_range, buffer_size = 20000, gamma=0.99, lr=0.00005, tau=0.001):
     """ Initialization
     """
     # Environment and A2C parameters
     self.act_dim = act_dim
     self.act_range = act_range
     self.env_dim = env_dim
     self.gamma = gamma
     # Create actor and critic networks
     self.actor = Actor(self.env_dim, act_dim, act_range, 0.1 * lr, tau)
     self.demo_actor = Actor(self.env_dim, act_dim, act_range, 0.1 * lr, tau)
     self.critic = Critic(self.env_dim, act_dim, lr, tau)
     self.buffer = Replay()
     self.batch_size = 2000
コード例 #23
0
 def __init__(self, state_size, batch_size, is_eval = False):
     self.state_size = state_size #
     self.action_size = 3
     self.buffer_size = 1000000
     self.batch_size = batch_size
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     self.inventory = []
     self.is_eval = is_eval    
     self.gamma = 0.99 
     self.tau = 0.001 
     self.actor_local = Actor(self.state_size, self.action_size) 
     self.actor_target = Actor(self.state_size, self.action_size)
     self.critic_local = Critic(self.state_size, self.action_size)
     self.critic_target = Critic(self.state_size, self.action_size)    
     self.critic_target.model.set_weights(self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(self.actor_local.model.get_weights())
コード例 #24
0
ファイル: Coach.py プロジェクト: YuechengLiu/PARL
    def _run_remote_tasks(self, signal_queue):
        # The remote actor will actually run on the local machine or other machines of xparl cluster
        remote_actor = Actor(self.game, self.args)

        while True:
            # receive running task signal
            # signal: specify task type and task input data (optional)
            signal = signal_queue.get()

            if signal["task"] == "self-play":
                episode_num_each_actor = self.args.numEps // self.args.actors_num
                result = remote_actor.self_play(
                    self.current_agent.get_weights(), episode_num_each_actor)
                self.remote_actors_return_queue.put({"self-play": result})

            elif signal["task"] == "pitting":
                games_num_each_actor = self.args.arenaCompare // self.args.actors_num
                result = remote_actor.pitting(
                    self.previous_agent.get_weights(),
                    self.current_agent.get_weights(), games_num_each_actor)
                self.remote_actors_return_queue.put({"pitting": result})

            elif signal["task"] == "evaluate_test_dataset":
                test_dataset = signal["test_dataset"]
                result = remote_actor.evaluate_test_dataset(
                    self.current_agent.get_weights(), test_dataset)
                self.remote_actors_return_queue.put(
                    {"evaluate_test_dataset": result})
            else:
                raise NotImplementedError
コード例 #25
0
ファイル: movie_database.py プロジェクト: cd155/PennX_SD1x
    def add_movie(self, movie_name, actors):
        # find whether movie name exist in movie list
        target_moive = next(
            (x for x in self.__movie_list if x.get_movie_name() == movie_name),
            None)
        if target_moive is None:
            target_moive = Movie(movie_name)
            self.__movie_list.append(target_moive)

        movie_actors = target_moive.get_actors()
        for actor in actors:
            # find whether actor name exist in actor list
            target_actor = next(
                (x for x in self.__actor_list if x.get_actor_name() == actor),
                None)
            if target_actor is None:
                target_actor = Actor(actor)
                self.__actor_list.append(target_actor)

            # add the new movie to target_actor
            new_movie_list = target_actor.get_movies()
            new_movie_list.append(target_moive)
            target_actor.set_movies(new_movie_list)

            # add the new actor to target_moive pending list
            movie_actors.append(target_actor)

        target_moive.set_actors(movie_actors)
コード例 #26
0
ファイル: train.py プロジェクト: YuechengLiu/PARL
    def run_remote_sample(self):
        """ Sample data from remote actor and update parameters of remote actor.
        """
        remote_actor = Actor(self.config)

        cnt = 0
        remote_actor.set_weights(self.cache_params)
        while True:
            batch = remote_actor.sample()
            self.sample_data_queue.put(batch)

            cnt += 1
            if cnt % self.config['get_remote_metrics_interval'] == 0:
                metrics = remote_actor.get_metrics()
                if metrics:
                    self.remote_metrics_queue.put(metrics)

            self.params_lock.acquire()

            if self.params_updated and self.cache_params_sent_cnt >= self.config[
                    'params_broadcast_interval']:
                self.params_updated = False
                self.cache_params = self.agent.get_weights()
                self.cache_params_sent_cnt = 0
            self.cache_params_sent_cnt += 1
            self.total_params_sync += 1

            self.params_lock.release()

            remote_actor.set_weights(self.cache_params)
コード例 #27
0
ファイル: ddpg_rec.py プロジェクト: jasonyanglu/DRL_REC
    def __init__(self, state_item_num, action_item_num, emb_dim, batch_size, tau, actor_lr, critic_lr,
                 gamma, buffer_size, item_space, summary_dir):

        self.state_item_num = state_item_num
        self.action_item_num = action_item_num
        self.emb_dim = emb_dim
        self.batch_size = batch_size
        self.tau = tau
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.gamma = gamma
        self.buffer_size = buffer_size
        self.item_space = item_space
        self.summary_dir = summary_dir

        self.sess = tf.Session()

        self.s_dim = emb_dim * state_item_num
        self.a_dim = emb_dim * action_item_num
        self.actor = Actor(self.sess, state_item_num, action_item_num, emb_dim, batch_size, tau, actor_lr)
        self.critic = Critic(self.sess, state_item_num, action_item_num, emb_dim,
                             self.actor.get_num_trainable_vars(), gamma, tau, critic_lr)
        self.exploration_noise = OUNoise(self.a_dim)

        # set up summary operators
        self.summary_ops, self.summary_vars = self.build_summaries()
        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(summary_dir, self.sess.graph)

        # initialize target network weights
        self.actor.hard_update_target_network()
        self.critic.hard_update_target_network()

        # initialize replay memory
        self.replay_buffer = ReplayBuffer(buffer_size)
コード例 #28
0
    def _create_new_children(self):
        """
        Private function to create the actors in the carla world
        which are children actors of this parent.

        :return:
        """
        for actor in self.carla_world.get_actors():
            if ((actor.parent and actor.parent.id == self.carla_id)
                    or (actor.parent is None and self.carla_id == 0)):
                if actor.id not in self.child_actors:
                    if actor.type_id.startswith('traffic'):
                        self.child_actors[actor.id] = Traffic.create_actor(
                            carla_actor=actor, parent=self)
                    elif actor.type_id.startswith("vehicle"):
                        self.child_actors[actor.id] = Vehicle.create_actor(
                            carla_actor=actor, parent=self)
                    elif actor.type_id.startswith("sensor"):
                        self.child_actors[actor.id] = Sensor.create_actor(
                            carla_actor=actor, parent=self)
                    elif actor.type_id.startswith("spectator"):
                        self.child_actors[actor.id] = Spectator(
                            carla_actor=actor, parent=self)
                    else:
                        self.child_actors[actor.id] = Actor(carla_actor=actor,
                                                            parent=self)
コード例 #29
0
ファイル: editor.py プロジェクト: TheMilkman3/CBS
    def __init__(self, parent=None, mode='add', actor_id=None):
        EditorBaseFrame.__init__(self, parent)
        self.actor = Actor()
        self.mode = mode
        self.actor_id = actor_id
        if self.actor_id is not None:
            self.load_actor()

        # create widgets
        name_frame = ttk.LabelFrame(self, text='Name')
        self.name_entry = ttk.Entry(name_frame, textvariable=self.actor.get_name_var())
        alignment_frame = ttk.LabelFrame(self, text='Alignment')
        self.alignment_combobox = \
            ttk.Combobox(alignment_frame, values=('Hero', 'Anti-Hero', 'Villain', 'Civilian', 'Wild Card'),
                         textvariable=self.actor.get_alignment_var())
        self.alignment_combobox.set(self.actor.alignment)
        self.save_button = ttk.Button(self, text='Save', command=self._b_save)
        self.back_button = ttk.Button(self, text='Back', command=self._b_back)

        # grid widgets
        self.name_entry.grid()
        self.alignment_combobox.grid()
        name_frame.grid(column=0, row=0)
        alignment_frame.grid(column=1, row=0)
        self.save_button.grid(column=10, row=0)
        self.back_button.grid(column=11, row=0)
コード例 #30
0
    def exec(self):
        input_lines = []
        for line in sys.stdin.readlines():
            input_lines.append(line.rstrip())
        try:
            first_line = input_lines[0]
            first_line = self.split_line(first_line)

            self.define_variables(first_line)
            self.define_group()

            current_line_idx = 1
            for i in range(self._number_actors):
                current_line = input_lines[current_line_idx]
                current_line = self.split_line(current_line)

                actor_cost = int(current_line[0])
                actor_number_groups = int(current_line[1])
                actor_groups = set()

                end_block_idx = current_line_idx+actor_number_groups+1
                for line_idx_group in range(current_line_idx+1, end_block_idx):
                    actor_groups.add(int(input_lines[line_idx_group]))

                self._actors.append(Actor(i+1, actor_cost, actor_groups))
                current_line_idx = end_block_idx

        except (ValueError, TypeError):
            print("Incorrect input format")