Python AgentModel.parameters Examples

Programming Language: Python

Namespace/Package Name: model

Class/Type: AgentModel

Method/Function: parameters

Examples at hotexamples.com: 2

Python AgentModel.parameters - 2 examples found. These are the top rated real world Python examples of model.AgentModel.parameters extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AgentModel(6)

parameters(2)

build(1)

load_state_dict(1)

load_weights(1)

save_weights(1)

zero_grad(1)

Example #1

Show file

File: agent.py Project: ddwooten/insight_2020_ai

class agent:

    def __init__(self):

        self.critic_loss = None
        
        self.factors_agent = None

        self.factors_critic = None

        self.history_len = 0

        self.is_train = None

        self.loss_agent = None

        self.loss_critic = None

        self.model_agent = None

        self.model_critic = None

        self.optimizer_agent = None

        self.optimizer_critic = None

        self.pred = None

        self.reward = None

    def add_model(self):
        """This function calls the appropriate model builder"""
        
        self.model_agent = AgentModel(12, 20, 6)

        self.model_critic = CriticModel(11, 21, 10, 0)

        self.set_model_weights(self.model_agent)

        self.set_model_weights(self.model_critic)

        self.optimizer_agent = torch.optim.Adam(self.model_agent.parameters(),
                                               lr = 0.001)

        self.optimizer_critic = torch.optim.Adam(self.model_critic.parameters(),
                                               lr = 0.001)

        self.loss_agent = torch.nn.MSELoss()

        self.loss_critic = torch.nn.MSELoss()

    def add_prediction(self, prediction):
        """This function concatenates the prediciton with the critic input"""
        
        i = 0

        j = self.history_len 

        self.factors_critic[i, j, 0] = prediction['score']

        self.factors_critic[i, j, 1] = prediction['r0']

        self.factors_critic[i, j, 2] = prediction['r1']

        self.factors_critic[i, j, 3] = prediction['r2']

        self.factors_critic[i, j, 4] = prediction['r3']

        self.factors_critic[i, j, 5] = prediction['r4']

        self.factors_critic[i, j, 6] = prediction['r5']

        self.factors_critic[i, j, 7] = prediction['sd']

        self.factors_critic[i, j, 8] = prediction['avg']

        self.factors_critic[i, j, 9] = prediction['m']

        self.factors_critic[i, j, 10] = prediction['k']

    def custom_loss_critic(self, target, selection, selection_averages,
                           target_averages):
        """This returns the normalized cross correlation between target and
        selection"""

        # These lines here compute the cross-correlation between target and
        # selection

        top = np.multiply((selection - selection_averages), 
                          (target - target_averages))

        top_sum = np.sum(top, axis = 0)

        bottom_selection = np.power((selection - selection_averages),2)

        bottom_targets = np.power((target - target_averages), 2)

        bottom_selection_sum = np.sum(bottom_selection, axis = 0)

        bottom_targets_sum = np.sum(bottom_targets, axis = 0)

        bottom = np.sqrt(np.multiply(bottom_selection_sum,
                                     bottom_targets_sum))

        divided = np.divide(top_sum, bottom)

        divided = divided[~np.isnan(divided)]

        return(np.sum(divided))
            
    def factorize(self, user_history):
        """This function factorizes a given user history, or batch of user
        histories, into factors for an lstm model"""

        # Reset the holding arrays

        self.factors_agent = np.zeros((1, 20, 12))

        self.factors_critic = np.zeros((1, 21, 11))

        # This i here is to conform with tensorflow input expectations

        i = 0

        j = 0

        for index, row in user_history.iterrows():

            # The last entry in a history is the one we attempt to predict

            if j == (user_history.shape[0]):

                break
            
            # Truncating maximum history to ~1 day of continuous listening

            if j == 20:

                break
            # In an act of data reduction and factor selection, I drop
            # all spotify embeddings and deploy my own
            
            self.factors_agent[i, j, 0] = row['score']

            self.factors_critic[i, j, 0] = row['score']

            self.factors_agent[i, j, 1] = row['r0']

            self.factors_critic[i, j, 1] = row['r0']

            self.factors_agent[i, j, 2] = row['r1']

            self.factors_critic[i, j, 2] = row['r1']

            self.factors_agent[i, j, 3] = row['r2']

            self.factors_critic[i, j, 3] = row['r2']

            self.factors_agent[i, j, 4] = row['r3']

            self.factors_critic[i, j, 4] = row['r3']

            self.factors_agent[i, j, 5] = row['r4']

            self.factors_critic[i, j, 5] = row['r4']

            self.factors_agent[i, j, 6] = row['r5']

            self.factors_critic[i, j, 6] = row['r5']

            self.factors_agent[i, j, 7] = row['m']

            self.factors_critic[i, j, 7] = row['m']

            self.factors_agent[i, j, 8] = row['k']

            self.factors_critic[i, j, 8] = row['k']

            self.factors_agent[i, j, 9] = row['day_w']

            self.factors_critic[i, j, 9] = row['sd']

            self.factors_agent[i, j, 10] = row['day_m']

            self.factors_critic[i, j, 10] = row['avg']

            self.factors_agent[i, j, 11] = row['hour_d']

            j += 1

        i += 1

        self.history_len = j

    def get_agent_reward(self, repeat):
        """This function gets the agent reward""" 

        # if the track is something the user has heard before take the reward
        # to the (1/2)

        if repeat > 0:

            reward =  math.pow(self.reward,0.5)

        else:

            reward = self.reward

        # Due to the square in the operation the magnitue of rward is limited
        # to 1E-7 due to machine precision concerns - verfied through testing

        if reward > 0.9999999:

            reward = 0.9999999 

        reward = torch.tensor([reward], requires_grad = True)

        self.reward = reward

    def get_critic_loss(self, current_user_history, data):
        """This function get the critic loss"""

        user = data[data.user_id == current_user_history.user_id.values[0]]

        user = user[['r0','r1','r2','r3', 'r4', 'r5']]

        user_array = user.to_numpy()

        # In order to use handy dandy numpy list comprehensions, we need to
        # make an overly bulky array for the averages both for target and for
        # selection ( as pssed to self.custom_loss_critic)

        selection_averages = []

        selection_averages.append(np.average(current_user_history.r0.values))

        selection_averages.append(np.average(current_user_history.r1.values))

        selection_averages.append(np.average(current_user_history.r2.values))

        selection_averages.append(np.average(current_user_history.r3.values))

        selection_averages.append(np.average(current_user_history.r4.values))

        selection_averages.append(np.average(current_user_history.r5.values))

        selection_averages = np.array(selection_averages)

        # This line here gives selection_averages a 2nd dimension to match time
        # while the repeat command coppies these average values through the time
        # axis

        selection_averages = np.repeat(selection_averages[None,:], 
                                       current_user_history.shape[0],
                                       axis = 0)

        selection_averages = selection_averages[-10:]

        selection_array=current_user_history[['r0','r1','r2','r3', 'r4', 'r5']]

        selection_array = selection_array[-10:]

        selection_array = selection_array.to_numpy()

        # Here we repeat this process for the whole user history as reflected
        # byuser

        target_averages = []

        target_averages.append(np.average(user.r0.values))

        target_averages.append(np.average(user.r1.values))
       
        target_averages.append(np.average(user.r2.values))
       
        target_averages.append(np.average(user.r3.values))
       
        target_averages.append(np.average(user.r4.values))
       
        target_averages.append(np.average(user.r5.values))
        
        target_averages = np.array(target_averages)

        target_averages = np.repeat(target_averages[None, :],
                                    selection_array.shape[0],
                                    axis = 0)
        
        critic_loss = []

        end  = selection_array.shape[0]

        start = 0

        while end < user_array.shape[0]:
            
            critic_loss.append(self.custom_loss_critic(user_array[start:end,],
                                                selection_array,
                                                selection_averages,
                                                target_averages))

            start += 1

            end += 1

        if len(critic_loss) > 0:

            critic_loss = np.average(critic_loss)

        else:

            critic_loss = 0.0

        critic_loss = torch.tensor([critic_loss], requires_grad = True)

        self.critic_loss = critic_loss

    def predict(self, user_history):
        """This function manages the training of the model based on the provided
        data"""

        self.factorize(user_history)

        self.pred = self.model_agent(torch.Tensor(self.factors_agent))

    def propagate(self, current_user_history, data, prediction, repeat):
        """This function propagates the loss through the actor and critic"""

        self.add_prediction(prediction)

        # Clear out the gradients from the last prediction

        self.model_agent.zero_grad()

        self.model_critic.zero_grad()

        # Get the critic reward

        self.reward = self.model_critic(torch.Tensor(self.factors_critic))

        self.get_agent_reward(repeat)

        # Get the agent loss and apply it
        
        agent_loss = self.loss_agent(self.reward, torch.tensor([1.0]))
        
        self.optimizer_agent.step(agent_loss.backward())

        # Get the critic loss and apply it

        self.get_critic_loss(current_user_history, data)

        evaluated_critic_loss = self.loss_critic(self.critic_loss,
                                                 torch.tensor([6.0]))

        self.optimizer_critic.step(evaluated_critic_loss.backward())

    def ready_agent(self, agent_model_path, critic_model_path, train):
        """This function sets up a working agent - one complete with a loss
        function and a model"""

        self.is_train = train 

        self.model_agent = torch.load(agent_model_path)

        self.model_critic = torch.load(critic_model_path)

        if self.model_agent is not None:
            
            print("Actor Model {} sucessuflly loaded.\n".format(agent_model_path))

        self.model_critic = torch.load(critic_model_path)

        if self.model_agent is not None:
            
            print("Critic Model {} sucessuflly loaded.\n".format(critic_model_path))

    def set_model_weights(self, model):
        """This function initilizes the weights in a pytorch model"""

        classname = model.__class__.__name__

        if classname.find('Linear') != -1:

            n = model.in_features

            y = 1.0 / np.sqrt(n)

            model.weight.data.uniform_(-y,y)

            model.bias.data.fill(0)

    def wake_agent(self, train):
        """This function sets up a working agent - one complete with a loss
        function and a model"""

        self.is_train = train 

        self.add_model()

Example #2

Show file

File: train.py Project: weituo12321/PREVALENT_HANNA

def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    # Set which GPU to use
    device = torch.device('cuda', hparams.device_id)

    # Load hyperparameters from checkpoint (if exists)
    if os.path.exists(hparams.load_path):
        print('Load model from %s' % hparams.load_path)
        ckpt = load(hparams.load_path, device)
        start_iter = ckpt['iter']
    else:
        if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent:
            if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only:
                sys.exit('load_path %s does not exist!' % hparams.load_path)
        ckpt = None
    start_iter = 0
    end_iter = hparams.n_iters

    if not hasattr(hparams, 'ask_baseline'):
        hparams.ask_baseline = None
    if not hasattr(hparams, 'instruction_baseline'):
        hparams.instruction_baseline = None

    # Set random seeds
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    np.random.seed(hparams.seed)
    random.seed(hparams.seed)

    # Create or load vocab
    train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt')
    if not os.path.exists(train_vocab_path):
        raise Exception('Vocab file not found at %s' % train_vocab_path)
    vocab = read_vocab([train_vocab_path])
    hparams.instr_padding_idx = vocab.index('<PAD>')

    tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len)
    if hparams.encoder_type == 'dic':
        tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len)
    featurizer = ImageFeatures(hparams.img_features, device)
    simulator = Simulator(hparams)

    # Create train environment
    train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train')

    # Create validation environments
    val_splits = ['val_seen', 'val_unseen']
    eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only
    if eval_mode:
        if 'val_seen' in hparams.load_path:
            val_splits = ['test_seen']
        elif 'val_unseen' in hparams.load_path:
            val_splits = ['test_unseen']
        else:
            val_splits = ['test_seen', 'test_unseen']
        end_iter = start_iter + 1

    if hparams.eval_on_val:
        val_splits = [x.replace('test_', 'val_') for x in val_splits]

    val_envs_tmp = { split: (
        Batch(hparams, simulator, featurizer, tokenizer, split=split),
        Evaluation(hparams, [split], hparams.data_path))
            for split in val_splits }

    val_envs = {}
    for key, value in val_envs_tmp.items():
        if '_seen' in key:
            val_envs[key + '_env_seen_anna'] = value
            val_envs[key + '_env_unseen_anna'] = value
        else:
            assert '_unseen' in key
            val_envs[key] = value

    # Build model and optimizer
    model = AgentModel(len(vocab), hparams, device).to(device)
    optimizer = optim.Adam(model.parameters(), lr=hparams.lr,
        weight_decay=hparams.weight_decay)

    best_metrics = { env_name  : -1 for env_name in val_envs.keys() }
    best_metrics['combined'] = -1

    # Load model paramters from checkpoint (if exists)
    if ckpt is not None:
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optim_state_dict'])
        best_metrics = ckpt['best_metrics']
        train_env.ix = ckpt['data_idx']

    if hparams.log_every == -1:
        hparams.log_every = round(len(train_env.data) / \
            (hparams.batch_size * 100)) * 100

    print('')
    pprint(vars(hparams), width=1)
    print('')
    print(model)
    print('Number of parameters:',
        sum(p.numel() for p in model.parameters() if p.requires_grad))

    if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent:
        assert eval_mode
        agent = SimpleAgent(hparams)
    else:
        agent = VerbalAskAgent(model, hparams, device)

    return train(train_env, val_envs, agent, model, optimizer, start_iter,
        end_iter, best_metrics, eval_mode)