Example #1
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        np.random.seed(random_seed)  # set the numpy seed

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed, device)
Example #2
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters (0.01)
Example #3
0
 def delete_actor(data):
     """
     Delete actor by id
     """
     #data = get_request_data()
     ### YOUR CODE HERE ###
     Actor.delete(1)
     # use this for 200 response code
     msg = 'Record successfully deleted'
     return make_response(jsonify(message=msg), 200)
 def __init__(self, children):
     self.children = set()
     for child in children:
         if isinstance(child, Actor):
             self.children.add(child)
         elif isinstance(child, User):
             self.children.add(Actor.by_user(child))
         elif isinstance(child, Server):
             self.children.add(Actor.by_server(child))
         elif isinstance(child, ActorCollection):
             self.children += child.children
         else:
             raise Error('Don\'t know what to do with %s' + child.__class__)
 def __init__(self, children):
     self.children = set()
     for child in children:
         if isinstance(child, Actor):
             self.children.add(child)
         elif isinstance(child, User):
             self.children.add(Actor.by_user(child))
         elif isinstance(child, Server):
             self.children.add(Actor.by_server(child))
         elif isinstance(child, ActorCollection):
             self.children += child.children
         else:
             raise Error('Don\'t know what to do with %s' + child.__class__)
Example #6
0
def delete_actor():
    """
    Delete actor by id
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
            Actor.delete(row_id)
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
    msg = 'Record successfully deleted'
    return make_response(jsonify(message=msg), 200)
Example #7
0
 def from_user(self, receivers=None, text=None, *_):
     if receivers is None:
         return ERR_NORECIPIENT(self.command, self.actor)
     if text is None:
         return ERR_NOTEXTTOSEND(self.actor)
     resp = []
     # TODO: check for ERR_TOOMANYTARGETS
     for receiver in receivers.split(','):
         if Channel.exists(receiver):
             users = [user
                      for user in Channel.get(receiver).users
                      if user is not self.user]
             resp.append(M(
                 ActorCollection(users),
                 self.command, str(receiver), text,
                 prefix=str(self.user)))
         elif User.exists(receiver):
             resp.append(M(
                 Actor.by_user(User.get(receiver)),
                 self.command, str(receiver), text,
                 prefix=str(self.user)))
         # TODO: Implement wildcards
         # TODO: check for ERR_WILDTOPLEVEL, RPL_AWAY, ERR_NOTOPLEVEL
         else:
             resp.append(ERR_NOSUCHNICK(receiver, self.actor))
     return resp
Example #8
0
def actor_clear_relations():
    """
    Clear all relations by id
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            actor_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
        actor = Actor.clear_relations(actor_id)
        try:
            rel_actor = {
                k: v
                for k, v in actor.__dict__.items() if k in ACTOR_FIELDS
            }
        except:
            err = 'Record with such id does not exist'
            return make_response(jsonify(error=err), 400)

        rel_actor['filmography'] = str(actor.filmography)
        return make_response(jsonify(rel_actor), 200)
    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
Example #9
0
 def dispatch(self, socket, message):
     actor = Actor.by_socket(socket)
     message.target = config.get("server", "servername")
     if message.command not in self.handlers:
         try:
             self.register(message.command)
         except ImportError, e:
             log.warning("Unknown command %s. Message was: %s. Error: %s" % (message.command, repr(message), e))
             return
Example #10
0
def actor_add_relation():
    """
    Add a movie to actor's filmography
    """
    data = get_request_data()
    KEY_DICT = ['id', 'relation_id']
    for dat in data:
        if dat not in set(KEY_DICT):
            err = 'Wrong key'
            return make_response(jsonify(error=err), 400)

    ### YOUR CODE HERE ###
    if data.get('id'):
        try:
            row_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
        obj = Actor.query.filter_by(id=data['id']).first()
        try:
            try_actor = {
                k: v
                for k, v in obj.__dict__.items() if k in ACTOR_FIELDS
            }
        except:
            err = 'Actor with such id does not exist'
            return make_response(jsonify(error=err), 400)
        if data.get('relation_id'):
            try:
                row_id = int(data['relation_id'])
            except:
                err = 'Id must be integer'
                return make_response(jsonify(error=err), 400)
            obj = Movie.query.filter_by(id=data['relation_id']).first()
            try:
                try_movie = {
                    k: v
                    for k, v in obj.__dict__.items() if k in MOVIE_FIELDS
                }
            except:
                err = 'Movie with such id does not exist'
                return make_response(jsonify(error=err), 400)
        else:
            err = 'No related_id specified'
            return make_response(jsonify(error=err), 400)
        related_movie = Movie.query.filter_by(id=data['relation_id']).first()
        actor = Actor.add_relation(data['id'],
                                   related_movie)  # add relation here
        rel_actor = {
            k: v
            for k, v in actor.__dict__.items() if k in ACTOR_FIELDS
        }
        rel_actor['filmography'] = str(actor.filmography)
        return make_response(jsonify(rel_actor), 200)
    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
    def __init__(self, state_size, action_size, random_seed, actor_layers,
                 critic_layers):
        """ Initialize an Agent object.

        Params
        ======
            state_size (int): size of the environment state
            action_size (int): size of the environment action
            random_seed (int): seed for the random
            actor_layers (array[int]): array containing the size of each layer of the actor network
            critic_layers (array[int]): array containing the size of each layer of the critic network
        """

        self.state_size = state_size
        self.action_size = action_size
        self.random_seed = random_seed
        random.seed(random_seed)
        np.random.seed(random_seed)

        # Actor
        print(f'Agent running on {DEVICE}')
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.random_seed, *actor_layers).to(DEVICE)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.random_seed, *actor_layers).to(DEVICE)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.random_seed, *critic_layers).to(DEVICE)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.random_seed,
                                    *critic_layers).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise
        self.noise = OrsnteinUhlenbeck(self.action_size, self.random_seed)

        # Replay Buffer
        self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, BATCH_SIZE,
                                   self.random_seed)
Example #12
0
 def dispatch(self, socket, message):
     actor = Actor.by_socket(socket)
     message.target = config.get('server', 'servername')
     if message.command not in self.handlers:
         try:
             self.register(message.command)
         except ImportError, e:
             log.warning('Unknown command %s. Message was: %s. Error: %s' %
                         (message.command, repr(message), e))
             return
Example #13
0
def add_actor():
    """
    Add new actor
    """
    data = get_request_data()

    # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    for dat in data:
        if dat not in set(ACTOR_FIELDS):
            err = 'Wrong key'
            return make_response(jsonify(error=err), 400)
    #
    # if 'name' in data.keys():
    #     if data['name'].isdigit():
    #         err = 'Name must be string'
    #         return make_response(jsonify(error=err), 400)
    #
    #     if (len(data['name']) > 50):
    #         err = 'Name must be less than 50 characters'
    #         return make_response(jsonify(error=err), 400)
    #
    #     if not data['name']:
    #         err = 'Name cannot be null'
    #         return make_response(jsonify(error=err), 400)
    #
    # if 'gender' in data.keys():
    #     if (data['gender'] != "male") and (data['gender'] != "female"):  # !!!!!!!!!!!!!!!!!!!!!!!!
    #         err = 'There are only two genders'
    #         return make_response(jsonify(error=err), 400)
    #
    if not data['date_of_birth']:
        err = 'enter date'
        return make_response(jsonify(error=err), 400)

    if 'date_of_birth' in data.keys():
        try:
            dt.strptime(data['date_of_birth'], DATE_FORMAT)
        except:
            err = 'incorrect date format(d.m.y)'
            return make_response(jsonify(error=err), 400)

    # act_dict = get_dict_of_actors()
    # for a in act_dict:
    #     if (a['name'] == data['name']):
    #         err = 'actor with that name is already exist'
    #         return make_response(jsonify(error=err), 400)

    new_record = data
    new_record['date_of_birth'] = dt.strptime(data['date_of_birth'],
                                              '%d.%m.%Y')

    new_actor = Actor.create(**new_record)  # !!!!!!!!!!!!!!!!!!!!!!!!
    new_record['id'] = new_actor.id

    return make_response(jsonify(new_record), 200)
Example #14
0
 def dispatch(self, socket, message):
     actor = Actor.by_socket(socket)
     message.target = config.get('server', 'servername')
     if message.command not in self.handlers:
         try:
             self.register(message.command)
         except ImportError as e:
             log.warning('Unknown command %s. Message was: %s. Error: %s'
             % (message.command, repr(message), e))
             return
     return self.handlers[message.command].handle(actor, message)
Example #15
0
def create_actors_acts_in_and_directors():
    movies = Movies.load_all()

    for movie in movies:
        moviee = tmdb.Movies(movie.id)
        response = moviee.credits()
        for person in moviee.crew:
            if person['job'] == 'Director':
                director = Director(movie.id, person['name'],
                                    person['profile_path'])
                if not Director.find_by_movie_id(movie.id):
                    director.save_to_db()

        for person in moviee.cast[:4]:
            if not Actor.load_by_id(person['id']):
                actor = Actor(person['id'], person['name'],
                              person['profile_path'])
                actor.save_to_db()
            relation = Movie_Actors(movie.id, person['id'])
            relation.save_to_db()
Example #16
0
    def add_actor(data):
        """
        Add new actor
        """
        #data = get_request_data()
        ### YOUR CODE HERE ###

        # use this for 200 response code
        new_record = Actor.create(**data)
        new_actor = {
            k: v
            for k, v in new_record.__dict__.items() if k in ACTOR_FIELDS
        }
        return make_response(jsonify(new_actor), 200)
Example #17
0
    def update_actor(data):
        """
        Update actor record by id
        """
        #data = get_request_data()
        ### YOUR CODE HERE ###

        # use this for 200 response code
        upd_record = Actor.update(1, **data)
        upd_actor = {
            k: v
            for k, v in upd_record.__dict__.items() if k in ACTOR_FIELDS
        }
        return make_response(jsonify(upd_actor), 200)
Example #18
0
def del_actor():
    data = get_request_data()
    id_del = data['id']

    if id_del and 'id' in data.keys():
        try:
            row_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)

        obj = Actor.query.filter_by(id=row_id).first()
        try:
            actor = {
                k: v
                for k, v in obj.__dict__.items() if k in ACTOR_FIELDS
            }
        except:
            err = 'Record with such id does not exist'
            return make_response(jsonify(error=err), 400)

        Actor.delete(id_del)
        message = "row with id has been succsessfuly deleted"
        return make_response(jsonify(message), 200)
Example #19
0
    def __init__(self, env, hp):

        self.env = env
        self.hp = hp
        self.critic = Critic(env.observation_space.shape[0],
                             env.action_space.shape[0], hp)

        self.target_critic = Critic(env.observation_space.shape[0],
                                    env.action_space.shape[0], hp)

        self.actor = Actor(env.observation_space.shape[0],
                           env.action_space.shape[0], env.action_space.high[0],
                           hp)

        self.target_actor = Actor(env.observation_space.shape[0],
                                  env.action_space.shape[0],
                                  env.action_space.high[0], hp)

        self.dataset = ReplayBuffer(self.hp['batch_size'],
                                    self.hp['max_buffer_size'])

        self.noise = OrnsteinUhlenbeckProcess(env.action_space.shape[0],
                                              sigma=self.hp['noise_sigma'])
        self.noise.reset_states()
Example #20
0
def add_actor():
    """
    Add new actor
    """
    data = get_request_data()
    ### YOUR CODE HERE ###

    # creating date
    cr_date = data['date_of_birth']
    cr_date = dt.strptime(cr_date, '%d.%m.%Y')
    cr_date = cr_date.strftime("%a, %d %b %Y %H:%M:%S" + " GMT")

    # use this for 200 response code
    new_record = Actor(name = data['name'], gender = data['gender'], date_of_birth = cr_date)
    new_actor = {k: v for k, v in new_record.__dict__.items() if k in ACTOR_FIELDS}

    return make_response(jsonify(new_actor), 200)
Example #21
0
    def from_user(self, receivers=None, text=None, *_):
        if receivers is None:
            return ERR_NORECIPIENT(self.command, self.actor)
        if text is None:
            return ERR_NOTEXTTOSEND(self.actor)
        resp = []
        # TODO: check for ERR_TOOMANYTARGETS
        for receiver in receivers.split(','):
            if Channel.exists(receiver):
                channel_log = '%s/%s.log' % (config.get(
                    'server', 'channel_log_dir'), receiver.replace('#', ''))
                # if not PrivmsgCommand.channel_log_files.get(channel_log):
                #     PrivmsgCommand.channel_log_files[channel_log] = open(channel_log,'a')
                # PrivmsgCommand.channel_log_files[channel_log].write("%s::%s::%s::%s\n" % (
                #         time.time(), time.strftime('%Y-%m-%d %H:%I:%S'), self.user.nickname, text
                # ))
                # PrivmsgCommand.channel_log_files[channel_log].flush()
                with open(channel_log, 'a') as f:
                    f.write("%s::%s::%s::%s\n" %
                            (time.time(), time.strftime('%Y-%m-%d %H:%I:%S'),
                             self.user.nickname, text))
                    f.flush()

                users = [
                    user for user in Channel.get(receiver).users
                    if user is not self.user
                ]
                resp.append(
                    M(ActorCollection(users),
                      self.command,
                      str(receiver),
                      text,
                      prefix=str(self.user)))
            elif User.exists(receiver):
                resp.append(
                    M(Actor.by_user(User.get(receiver)),
                      self.command,
                      str(receiver),
                      text,
                      prefix=str(self.user)))
            # TODO: Implement wildcards
            # TODO: check for ERR_WILDTOPLEVEL, RPL_AWAY, ERR_NOTOPLEVEL
            else:
                resp.append(ERR_NOSUCHNICK(receiver, self.actor))
        return resp
Example #22
0
def update_actor():
    """
    Update actor record by id
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
    keys = list(data.keys())
    keys.remove('id')
    for i in range(len(keys)):
        if keys[i] == 'date_of_birth':
            try:
                data['date_of_birth'] = dt.strptime(data['date_of_birth'],
                                                    '%d.%m.%Y').date()
            except:
                err = 'Wrong data format'
                return make_response(jsonify(error=err), 400)
            continue
        elif keys[i] == 'name':
            continue
        elif keys[i] == 'gender':
            continue
        else:
            err = 'Wrong keys'
            return make_response(jsonify(error=err), 400)
    try:
        upd_record = Actor.update(row_id, **data)
        upd_actor = {
            k: v
            for k, v in upd_record.__dict__.items() if k in ACTOR_FIELDS
        }
        return make_response(jsonify(upd_actor), 200)
    except:
        err = 'Record with such id does not exist'
        return make_response(jsonify(error=err), 400)
def actor_add_relation():
    """
    Add a movie to actor's filmography
    """
    data = get_request_data()
    if 'id' in data.keys():
        try:
            row_id = int(data['id'])
            relation_id = data['relation_id']
            obj_movie = Movie.query.filter_by(id=relation_id).first()
            actor = Actor.add_relation(row_id, obj_movie)
            rel_actor = {
                k: v
                for k, v in actor.__dict__.items() if k in ACTOR_FIELDS
            }
            rel_actor['filmography'] = str(actor.filmography)
            return make_response(jsonify(rel_actor), 200)

        except:
            err = 'Id must be integer'
            return make_response(jsonify(error=err), 400)
    def load(self, transformed_response):
        print "loading...\n"

        result = {}
        if transformed_response.get('Person'):
            self.entities += [Person().extract(transformed_response['Person'])]
        if transformed_response.get('Author'):
            self.entities += [Author().extract(transformed_response['Author'])]
        if transformed_response.get('Actor'):
            self.entities += [Actor().extract(transformed_response['Actor'])]
        if transformed_response.get('BusinessPerson'):
            self.entities += [
                BusinessPerson().extract(
                    transformed_response['BusinessPerson'])
            ]
        if transformed_response.get('League'):
            self.entities += [League().extract(transformed_response['League'])]
        if transformed_response.get('SportsTeam'):
            self.entities += [
                SportsTeam().extract(transformed_response['SportsTeam'])
            ]
        if transformed_response.get('Description'):
            self.entities += [
                Description().extract(transformed_response['Description'])
            ]

        header = str(self.query) + "("
        for entity in self.entities:
            if entity.__class__.__name__ != "Description":
                header = header + "  " + str(entity.__class__.__name__)
        header = header + ")"
        print "----------------------------------"
        print header
        print "----------------------------------"

        for entity in self.entities:
            entity.print_box()
            result[entity.__class__.__name__] = entity

        return result
Example #25
0
def add_actor():
    """
    Add new actor
    """
    ### YOUR CODE HERE ###
    data = get_request_data()
    if 'name' in data.keys():
        if 'date_of_birth' in data.keys():
            if 'gender' in data.keys():
                try:
                    data['date_of_birth'] = dt.strptime(
                        data['date_of_birth'], '%d.%m.%Y').date()
                except:
                    err = 'Wrong data format'
                    return make_response(jsonify(error=err), 400)
                if data['gender'].isalpha():
                    new_record = Actor.create(**data)
                    try:
                        new_actor = {
                            k: v
                            for k, v in new_record.__dict__.items()
                            if k in ACTOR_FIELDS
                        }
                    except:
                        err = 'Record with such id does not exist'
                        return make_response(jsonify(error=err), 400)

                    return make_response(jsonify(new_actor), 200)
                else:
                    err = 'Wrong gender format'
                    return make_response(jsonify(error=err), 400)
            else:
                err = 'No gender specified'
                return make_response(jsonify(error=err), 400)
        else:
            err = 'No date_of_birth specified'
            return make_response(jsonify(error=err), 400)
    else:
        err = 'No name specified'
        return make_response(jsonify(error=err), 400)
Example #26
0
    def from_user(self, receivers=None, text=None, *_):
        if receivers is None:
            return ERR_NORECIPIENT(self.command, self.actor)
        if text is None:
            return ERR_NOTEXTTOSEND(self.actor)
        resp = []
        # TODO: check for ERR_TOOMANYTARGETS
        for receiver in receivers.split(','):
            if Channel.exists(receiver):
                channel_log = '%s/%s.log' % ( config.get('server', 'channel_log_dir'), receiver.replace('#',''))
                # if not PrivmsgCommand.channel_log_files.get(channel_log):
                #     PrivmsgCommand.channel_log_files[channel_log] = open(channel_log,'a')
                # PrivmsgCommand.channel_log_files[channel_log].write("%s::%s::%s::%s\n" % (
                #         time.time(), time.strftime('%Y-%m-%d %H:%I:%S'), self.user.nickname, text
                # ))
                # PrivmsgCommand.channel_log_files[channel_log].flush()
                with open(channel_log,'a') as f:
                    f.write("%s::%s::%s::%s\n" % (
                        time.time(), time.strftime('%Y-%m-%d %H:%I:%S'), self.user.nickname, text
                    ))
                    f.flush()

                users = [user for user in Channel.get(receiver).users if user is not self.user]
                resp.append(M(
                    ActorCollection(users),
                    self.command, str(receiver), text,
                    prefix=str(self.user)
                ))
            elif User.exists(receiver):
                resp.append(M(
                    Actor.by_user(User.get(receiver)),
                    self.command, str(receiver), text,
                    prefix=str(self.user)
                ))
            # TODO: Implement wildcards
            # TODO: check for ERR_WILDTOPLEVEL, RPL_AWAY, ERR_NOTOPLEVEL
            else:
                resp.append(ERR_NOSUCHNICK(receiver, self.actor))
        return resp
Example #27
0
def actor_add_relation():
    """
    Add a movie to actor's filmography
    """
    data = get_request_data()
    if 'id' in data.keys():
        if 'relation_id' in data.keys():
            try:
                actor_id = int(data['id'])
            except:
                err = "actor_id must be an integer"
                return make_response(jsonify(error=err), 400)
            try:
                row_m_id = int(data['relation_id'])
            except:
                err = "movie_id must be integer"
                return make_response(jsonify(error=err), 400)
            movie = Movie.query.filter_by(id=row_m_id).first()
            actor = Actor.add_relation(actor_id, movie)
            try:
                rel_actor = {
                    k: v
                    for k, v in actor.__dict__.items() if k in ACTOR_FIELDS
                }
            except:
                err = 'Record with such id does not exist'
                return make_response(jsonify(error=err), 400)

            rel_actor['filmography'] = str(actor.filmography)
            return make_response(jsonify(rel_actor), 200)
        else:
            err = 'No relation_id specified'
            return make_response(jsonify(error=err), 400)
    else:
        err = 'No id specified'
        return make_response(jsonify(error=err), 400)
Example #28
0
 def actor_add_relation():
     """
     Add a movie to actor's filmography
     """
     #data = get_request_data()
     data = {
         'name': 'Megan Fox',
         'gender': 'female',
         'date_of_birth': dt.strptime('16.05.1986', '%d.%m.%Y').date(),
         'name': 'Transformers',
         'genre': 'action',
         'year': 2007
     }
     ### YOUR CODE HERE ###
     movie_data = {k: v for k, v in data.items() if k in MOVIE_FIELDS}
     movie = Movie.create(movie_data)
     # use this for 200 response code
     actor = Actor.add_relation(data["id"], movie)  # add relation here
     rel_actor = {
         k: v
         for k, v in actor.__dict__.items() if k in ACTOR_FIELDS
     }
     rel_actor['filmography'] = str(actor.filmography)
     return make_response(jsonify(rel_actor), 200)
Example #29
0
    'critic_threshold': 17.5,
    'critic_suffices_required': 1,
    'critic_steps_start': 200,
    'critic_steps_end': 200,
    'actor_steps_start': 1000,
    'actor_steps_end': 1000,
    'batch_size': 256,
    'seed': 123456,
    'replay_fill_threshold': 1.,
    'random_exploration': True,
    'test_iterations': 30,
    'validation_epoch_mod': 3,
}

# configuring the environment
environment = gym.make('Humanoid-v3')
# environment._max_episode_steps = 600

# setting up the training components
agent = AWRAgent
actor = Actor()
critic = Critic()

# training and testing
Training.train((actor, critic),
               agent,
               environment,
               hyper_ps,
               save=True,
               debug_type=DebugType.NONE)
Example #30
0
class DDPG():
    """Reinforcement Learning agent that learns using DDPG."""
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
        self.score = 0
        self.best_score = -np.inf

    def reset_episode(self):
        self.noise.reset()
        state = self.task.reset()
        self.last_state = state
        return state

    def step(self, action, reward, next_state, done):

        if done:
            reward = self.eval_episode(reward)

        self.add_score(reward)

        # Save experience / reward
        self.memory.add(self.last_state, action, reward, next_state, done)

        # Learn, if enough samples are available in memory
        if len(self.memory) > self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences)

        # Roll over last state and action
        self.last_state = next_state

    def add_score(self, reward):
        self.score += reward
        if self.best_score < self.score:
            self.best_score = self.score

    def reset_score(self):
        self.score = 0

    def acceptable_episode(self):
        #print(self.task.sim.pose[:3] - self.task.target_pos)
        print(np.linalg.norm(self.task.sim.pose[:3] - self.task.target_pos))

    def eval_episode(self, episode_reward):
        x = self.task.sim.pose[0]
        y = self.task.sim.pose[1]
        z = self.task.sim.pose[2]
        if z <= 0:
            episode_reward -= 35
        elif z >= 145:
            episode_reward -= 25

        if (z >= 90 and z <= 110) and (x >= -20 and x <= 20) and (y >= -20
                                                                  and y <= 20):
            episode_reward += 40
        elif (z >= 65 and z < 130) and (x >= -50 and x <= 50) and (y >= -50 and
                                                                   y <= 50):
            episode_reward += 50
        return episode_reward

    def act(self, state):
        """Returns actions for given state(s) as per current policy."""
        state = np.reshape(state, [-1, self.state_size])
        action = self.actor_local.model.predict(state)[0]
        return list(action +
                    self.noise.sample())  # add some noise for exploration

    def learn(self, experiences):
        """Update policy and value parameters using given batch of experience tuples."""
        # Convert experience tuples to separate arrays for each element (states, actions, rewards, etc.)
        states = np.vstack([e.state for e in experiences if e is not None])
        actions = np.array([e.action for e in experiences
                            if e is not None]).astype(np.float32).reshape(
                                -1, self.action_size)
        rewards = np.array([e.reward for e in experiences if e is not None
                            ]).astype(np.float32).reshape(-1, 1)
        dones = np.array([e.done for e in experiences
                          if e is not None]).astype(np.uint8).reshape(-1, 1)
        next_states = np.vstack(
            [e.next_state for e in experiences if e is not None])

        # Get predicted next-state actions and Q values from target models
        #     Q_targets_next = critic_target(next_state, actor_target(next_state))
        actions_next = self.actor_target.model.predict_on_batch(next_states)
        Q_targets_next = self.critic_target.model.predict_on_batch(
            [next_states, actions_next])

        # Compute Q targets for current states and train critic model (local)
        Q_targets = rewards + self.gamma * Q_targets_next * (1 - dones)
        self.critic_local.model.train_on_batch(x=[states, actions],
                                               y=Q_targets)

        # Train actor model (local)
        action_gradients = np.reshape(
            self.critic_local.get_action_gradients([states, actions, 0]),
            (-1, self.action_size))
        self.actor_local.train_fn([states, action_gradients,
                                   1])  # custom training function

        # Soft-update target models
        self.soft_update(self.critic_local.model, self.critic_target.model)
        self.soft_update(self.actor_local.model, self.actor_target.model)

    def soft_update(self, local_model, target_model):
        """Soft update model parameters."""
        local_weights = np.array(local_model.get_weights())
        target_weights = np.array(target_model.get_weights())

        assert len(local_weights) == len(
            target_weights
        ), "Local and target model parameters must have the same size"

        new_weights = self.tau * local_weights + (1 -
                                                  self.tau) * target_weights
        target_model.set_weights(new_weights)
Example #31
0
def main():
    env = DialogEnvironment()
    experiment_name = args.logdir.split('/')[1] #model name

    torch.manual_seed(args.seed)

    #TODO
    actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size)
    
    actor.to(device)
    
    actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate)
    # load demonstrations

    writer = SummaryWriter(args.logdir)

    if args.load_model is not None: #TODO
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)

        actor.load_state_dict(ckpt['actor'])
 

    
    episodes = 0


    for iter in range(args.max_iter_num):
        actor.eval()
 

        steps = 0
        scores = []
        states = []
        expert_actions = []
        while steps < args.batch_size: 
            scores = []
            similarity_scores = []
            state, expert_action, raw_state, raw_expert_action = env.reset()

            score = 0
            similarity_score = 0
            state = state[:args.seq_len,:]
            expert_action = expert_action[:args.seq_len,:]
            state = state.to(device)
            expert_action = expert_action.to(device)
            states.append(state)
            expert_actions.append(expert_action)



                similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5)
                #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim')
                if done:
                    break

            episodes += 1

            similarity_scores.append(similarity_score)
        states = torch.stack(states)
        actions_pred , _ = actor(states)
        expert_actions = torch.stack(expert_actions)



        similarity_score_avg = np.mean(similarity_scores)
        print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg))

        actor.train()
        loss = F.mse_loss(actions_pred,expert_action)
        actor_optim.zero_grad()
        actor_optim.step() 
        # and this is basically all we need to do



        train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args)
        writer.add_scalar('log/score', float(score_avg), iter)
        writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter)
        writer.add_text('log/raw_state', raw_state[0],iter)
        raw_action = get_raw_action(action) #TODO
        writer.add_text('log/raw_action', raw_action,iter)
        writer.add_text('log/raw_expert_action', raw_expert_action,iter)

        if iter % 100:
            score_avg = int(score_avg)
            # Open a file with access mode 'a'
            file_object = open(experiment_name+'.txt', 'a')

            result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n'
            # Append at the end of file
            file_object.write(result_str)
            # Close the file
            file_object.close()

            model_path = os.path.join(os.getcwd(),'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar')

            save_checkpoint({
                'actor': actor.state_dict(),
                'critic': critic.state_dict(),
                'discrim': discrim.state_dict(),
                'args': args,
                'score': score_avg,
            }, filename=ckpt_path)
Example #32
0
def main():
    env = DialogEnvironment()
    experiment_name = args.logdir.split('/')[1] #model name

    torch.manual_seed(args.seed)

    #TODO
    actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size)
    critic = Critic(hidden_size=args.hidden_size,num_layers=args.num_layers,input_size=args.input_size,seq_len=args.seq_len)
    discrim = Discriminator(hidden_size=args.hidden_size,num_layers=args.hidden_size,input_size=args.input_size,seq_len=args.seq_len)
    
    actor.to(device), critic.to(device), discrim.to(device)
    
    actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate)
    critic_optim = optim.Adam(critic.parameters(), lr=args.learning_rate, 
                              weight_decay=args.l2_rate) 
    discrim_optim = optim.Adam(discrim.parameters(), lr=args.learning_rate)

    # load demonstrations

    writer = SummaryWriter(args.logdir)

    if args.load_model is not None: #TODO
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)

        actor.load_state_dict(ckpt['actor'])
        critic.load_state_dict(ckpt['critic'])
        discrim.load_state_dict(ckpt['discrim'])


    
    episodes = 0
    train_discrim_flag = True

    for iter in range(args.max_iter_num):
        actor.eval(), critic.eval()
        memory = deque()

        steps = 0
        scores = []
        similarity_scores = []
        while steps < args.total_sample_size: 
            scores = []
            similarity_scores = []
            state, expert_action, raw_state, raw_expert_action = env.reset()
            score = 0
            similarity_score = 0
            state = state[:args.seq_len,:]
            expert_action = expert_action[:args.seq_len,:]
            state = state.to(device)
            expert_action = expert_action.to(device)
            for _ in range(10000): 

                steps += 1

                mu, std = actor(state.resize(1,args.seq_len,args.input_size)) #TODO: gotta be a better way to resize. 
                action = get_action(mu.cpu(), std.cpu())[0]
                for i in range(5):
                    emb_sum = expert_action[i,:].sum().cpu().item()
                    if emb_sum == 0:
                       # print(i)
                        action[i:,:] = 0 # manual padding
                        break

                done= env.step(action)
                irl_reward = get_reward(discrim, state, action, args)
                if done:
                    mask = 0
                else:
                    mask = 1


                memory.append([state, torch.from_numpy(action).to(device), irl_reward, mask,expert_action])
                score += irl_reward
                similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5)
                #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim')
                if done:
                    break

            episodes += 1
            scores.append(score)
            similarity_scores.append(similarity_score)

        score_avg = np.mean(scores)
        similarity_score_avg = np.mean(similarity_scores)
        print('{}:: {} episode score is {:.2f}'.format(iter, episodes, score_avg))
        print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg))

        actor.train(), critic.train(), discrim.train()
        if train_discrim_flag:
            expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, args) 
            print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100))
            writer.add_scalar('log/expert_acc', float(expert_acc), iter) #logg
            writer.add_scalar('log/learner_acc', float(learner_acc), iter) #logg
            writer.add_scalar('log/avg_acc', float(learner_acc + expert_acc)/2, iter) #logg
            if args.suspend_accu_exp is not None: #only if not None do we check.
                if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen:
                    train_discrim_flag = False

        train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args)
        writer.add_scalar('log/score', float(score_avg), iter)
        writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter)
        writer.add_text('log/raw_state', raw_state[0],iter)
        raw_action = get_raw_action(action) #TODO
        writer.add_text('log/raw_action', raw_action,iter)
        writer.add_text('log/raw_expert_action', raw_expert_action,iter)

        if iter % 100:
            score_avg = int(score_avg)
            # Open a file with access mode 'a'
            file_object = open(experiment_name+'.txt', 'a')

            result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n'
            # Append at the end of file
            file_object.write(result_str)
            # Close the file
            file_object.close()

            model_path = os.path.join(os.getcwd(),'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar')

            save_checkpoint({
                'actor': actor.state_dict(),
                'critic': critic.state_dict(),
                'discrim': discrim.state_dict(),
                'args': args,
                'score': score_avg,
            }, filename=ckpt_path)
class DDPGAgent:
    """Interacts with and learns from the environment."""
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        np.random.seed(random_seed)  # set the numpy seed

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed, device)

        # add OU noise for exploration
        self.noise = OUNoise(action_size, scale=1.0, sigma=.1)

    def reset(self):
        self.noise.reset()

    def step(self, states, actions, rewards, next_states, dones, time_step):
        """Save experience in replay memory, and use random sample from buffer to learn."""
        # Save experience / reward (for each agent)
        for state, action, reward, next_state, done in zip(
                states, actions, rewards, next_states, dones):
            self.memory.add(state, action, reward, next_state, done)

        # Learn, if enough samples are available in memory and every 20 steps
        if len(self.memory) > BATCH_SIZE and time_step % LEARN_STEPS == 0:
            for _ in range(
                    N_UPDATES):  # generate n experiences and realize n updates
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, states, epsilon=0.0, add_noise=True):
        """Returns actions for given state as per current policy."""
        states = torch.from_numpy(states).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            actions = self.actor_local(states).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:  # add a noise (based on normal distribution) to exploration
            actions += self.noise.noise() * epsilon

        return np.clip(actions, -1, 1)

    def learn(self, experiences, gamma):
        """Update policy and value parameters using given batch of experience tuples.
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value
        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences

        self.__update_critic_local(actions, dones, gamma, next_states, rewards,
                                   states)
        self.__update_actor_local(states)

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

    def __update_critic_local(self, actions, dones, gamma, next_states,
                              rewards, states):
        # Get predicted next-state actions and Q values from target models
        actions_next = self.actor_target(next_states)
        Q_targets_next = self.critic_target(next_states, actions_next)
        # Compute Q targets for current states (y_i)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
        # Compute critic loss
        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

    def __update_actor_local(self, states):
        # Compute actor loss
        actions_pred = self.actor_local(states)
        actor_loss = -self.critic_local(states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

    def soft_update(self, local_model, target_model, tau):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target
        Params
        ======
            local_model: PyTorch model (weights will be copied from)
            target_model: PyTorch model (weights will be copied to)
            tau (float): interpolation parameter
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)

    def network_summary(self):
        print('- Actor Summary (both local and target): ')
        self.actor_local.to(device).summary()

        print('- Critic Summary (both local and target): ')
        self.actor_local.to(device).summary()

    def save(self,
             checkpoint_actor_name='checkpoint_actor',
             checkpoint_critic_name='checkpoint_critic'):
        """Save the actor and critic network weights"""
        torch.save(self.actor_local.state_dict(),
                   path_result_folder(f'{checkpoint_actor_name}.pth'))
        torch.save(self.critic_local.state_dict(),
                   path_result_folder(f'{checkpoint_critic_name}.pth'))

    @staticmethod
    def load(env: UnityEnvironment,
             random_seed=0,
             checkpoint_actor_name='checkpoint_actor',
             checkpoint_critic_name='checkpoint_critic'):
        """Load the actor and critic network weights"""
        # get the default brain
        brain_name = env.brain_names[0]
        brain = env.brains[brain_name]

        env_info = env.reset(train_mode=True)[brain_name]
        state_size = len(env_info.vector_observations[0])
        action_size = brain.vector_action_space_size

        loaded_agent = DDPGAgent(state_size, action_size, random_seed)
        loaded_agent.actor_local.load_state_dict(
            torch.load(path_result_folder(f'{checkpoint_actor_name}.pth')))
        loaded_agent.critic_local.load_state_dict(
            torch.load(path_result_folder(f'{checkpoint_critic_name}.pth')))
        return loaded_agent
class Agent():
    """ Interacts with and learn from the environment """
    def __init__(self, state_size, action_size, random_seed, actor_layers,
                 critic_layers):
        """ Initialize an Agent object.

        Params
        ======
            state_size (int): size of the environment state
            action_size (int): size of the environment action
            random_seed (int): seed for the random
            actor_layers (array[int]): array containing the size of each layer of the actor network
            critic_layers (array[int]): array containing the size of each layer of the critic network
        """

        self.state_size = state_size
        self.action_size = action_size
        self.random_seed = random_seed
        random.seed(random_seed)
        np.random.seed(random_seed)

        # Actor
        print(f'Agent running on {DEVICE}')
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.random_seed, *actor_layers).to(DEVICE)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.random_seed, *actor_layers).to(DEVICE)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.random_seed, *critic_layers).to(DEVICE)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.random_seed,
                                    *critic_layers).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise
        self.noise = OrsnteinUhlenbeck(self.action_size, self.random_seed)

        # Replay Buffer
        self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, BATCH_SIZE,
                                   self.random_seed)

    def step(self, states, actions, rewards, next_states, dones, time_step):
        """ Save experience in replay memory, and use random sample from buffer to learn """
        for state, action, reward, next_state, done in zip(
                states, actions, rewards, next_states, dones):
            self.memory.add(state, action, reward, next_state, done)

        # Learn only if there is enough samples on memory
        if len(self.memory) > BATCH_SIZE and time_step % LEARN_STEPS == 0:
            for _ in range(N_UPDATES):
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, state, add_noise=True, epsilon=1.0):
        """ Returns actions for given state as per current policy """
        state = torch.from_numpy(state).float().to(DEVICE)
        self.actor_local.eval()

        with torch.no_grad():
            actions = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()

        if add_noise:
            # actions += self.noise.sample()
            actions += np.random.normal(0, .3) * epsilon

        return np.clip(actions, -1, 1)

    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma):
        """ Update policy and value parameters using given batch of experience tuples
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value

        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
            gamma (float): discount factor
        """

        states, actions, rewards, next_states, dones = experiences

        # Critic update
        actions_next = self.actor_target(next_states)
        q_targets_next = self.critic_target(next_states, actions_next)
        Q_targets = rewards + (gamma * q_targets_next * (1 - dones))

        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)

        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # Actor update
        actions_pred = self.actor_local(states)
        actor_loss = -self.critic_local(states, actions_pred).mean()

        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # Update weights
        self.soft_update(self.actor_local, self.actor_target, TAU)
        self.soft_update(self.critic_local, self.critic_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        """ Soft update model parameters
        θ_target = τ*θ_local + (1 - τ)*θ_target

        Params
        ======
            local_model (PyTorch model): weights will copied from
            target_model (PyTorch model): weights will copied to
            tau (float): interpolation parameter
        """

        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1 - tau) * target_param.data)