Ejemplo n.º 1
0
    def initialize(hparams, gen_data, *args, **kwargs):
        gen_data.set_batch_size(hparams['batch_size'])
        # Create main model
        encoder = Encoder(vocab_size=gen_data.n_characters,
                          d_model=hparams['d_model'],
                          padding_idx=gen_data.char2idx[gen_data.pad_symbol],
                          dropout=hparams['dropout'],
                          return_tuple=True)
        # Create RNN layers
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['unit_type'],
                         stack_width=hparams['stack_width'],
                         stack_depth=hparams['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))

        model = nn.Sequential(
            encoder,
            *rnn_layers,
            RNNLinearOut(
                out_dim=gen_data.n_characters,
                hidden_size=hparams['d_model'],
                bidirectional=False,
                # encoder=encoder,
                # dropout=hparams['dropout'],
                bias=True))
        if use_cuda:
            model = model.cuda()
        optimizer = parse_optimizer(hparams, model)
        rnn_args = {
            'num_layers': hparams['num_layers'],
            'hidden_size': hparams['d_model'],
            'num_dir': 1,
            'device': device,
            'has_stack': has_stack,
            'has_cell': hparams['unit_type'] == 'lstm',
            'stack_width': hparams['stack_width'],
            'stack_depth': hparams['stack_depth']
        }
        return model, optimizer, gen_data, rnn_args
Ejemplo n.º 2
0
    def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen,
                   *args, **kwargs):
        prior_data_gen.set_batch_size(hparams['batch_size'])
        demo_data_gen.set_batch_size(hparams['batch_size'])

        # Create main model
        encoder = OneHotEncoder(vocab_size=prior_data_gen.n_characters,
                                return_tuple=False,
                                device=device)

        # Create RNN layers
        model = nn.Sequential(
            encoder,
            RNNGenerator(input_size=prior_data_gen.n_characters,
                         hidden_size=hparams['d_model'],
                         unit_type=hparams['unit_type'],
                         num_layers=hparams['num_layers'],
                         dropout=hparams['dropout'],
                         device=device),
            RNNLinearOut(out_dim=prior_data_gen.n_characters,
                         hidden_size=hparams['d_model'],
                         bidirectional=False,
                         bias=True))
        if use_cuda:
            model = model.cuda()
        optimizer = parse_optimizer(hparams, model)
        gen_args = {
            'num_layers': hparams['num_layers'],
            'hidden_size': hparams['d_model'],
            'num_dir': 1,
            'has_stack': False,
            'has_cell': hparams['unit_type'] == 'lstm',
            'device': device,
            'expert_model': {
                'pretraining': DummyPredictor(),
                'drd2': RNNPredictor(hparams['drd2'], device, True),
                'logp': RNNPredictor(hparams['logp'], device),
                'jak2_max': XGBPredictor(hparams['jak2']),
                'jak2_min': XGBPredictor(hparams['jak2'])
            }.get(hparams['exp_type']),
            'demo_data_gen': demo_data_gen,
            'unbiased_data_gen': unbiased_data_gen,
            'prior_data_gen': prior_data_gen,
            'exp_type': hparams['exp_type'],
        }
        print(f'Number of model parameters={count_parameters(model)}')
        return model, optimizer, gen_args
Ejemplo n.º 3
0
 def test_stack_rnn(self):
     x, y = gen_data.random_training_set(batch_size=bz)
     d_model = 12
     hidden_size = 16
     stack_width = 10
     stack_depth = 20
     unit_type = 'lstm'
     num_layers = 2
     hidden_states = [
         get_initial_states(bz, hidden_size, 1, stack_depth, stack_width,
                            unit_type) for _ in range(num_layers)
     ]
     encoder = Encoder(gen_data.n_characters, d_model,
                       gen_data.char2idx[gen_data.pad_symbol])
     x = encoder(x)
     stack_rnn_1 = StackRNN(1,
                            d_model,
                            hidden_size,
                            True,
                            'gru',
                            stack_width,
                            stack_depth,
                            k_mask_func=encoder.k_padding_mask)
     stack_rnn_2 = StackRNN(2,
                            hidden_size,
                            hidden_size,
                            True,
                            'gru',
                            stack_width,
                            stack_depth,
                            k_mask_func=encoder.k_padding_mask)
     outputs = stack_rnn_1([x] + hidden_states)
     outputs = stack_rnn_2(outputs)
     assert len(outputs) > 1
     linear = RNNLinearOut(
         4,
         hidden_size,
         bidirectional=False,
     )
     x = linear(outputs)
     print(x[0].shape)
    def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen,
                   *args, **kwargs):
        # Embeddings provider
        encoder = Encoder(
            vocab_size=demo_data_gen.n_characters,
            d_model=hparams['d_model'],
            padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol],
            dropout=hparams['dropout'],
            return_tuple=True)

        # Agent entities
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['agent_params']['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['agent_params']['unit_type'],
                         stack_width=hparams['agent_params']['stack_width'],
                         stack_depth=hparams['agent_params']['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['agent_params']['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))
        agent_net = nn.Sequential(
            encoder, *rnn_layers,
            RNNLinearOut(out_dim=demo_data_gen.n_characters,
                         hidden_size=hparams['d_model'],
                         bidirectional=False,
                         bias=True))
        agent_net = agent_net.to(device)
        optimizer_agent_net = parse_optimizer(hparams['agent_params'],
                                              agent_net)
        selector = MolEnvProbabilityActionSelector(
            actions=demo_data_gen.all_characters)
        probs_reg = StateActionProbRegistry()
        init_state_args = {
            'num_layers': hparams['agent_params']['num_layers'],
            'hidden_size': hparams['d_model'],
            'stack_depth': hparams['agent_params']['stack_depth'],
            'stack_width': hparams['agent_params']['stack_width'],
            'unit_type': hparams['agent_params']['unit_type']
        }
        agent = PolicyAgent(model=agent_net,
                            action_selector=selector,
                            states_preprocessor=seq2tensor,
                            initial_state=agent_net_hidden_states_func,
                            initial_state_args=init_state_args,
                            apply_softmax=True,
                            probs_registry=probs_reg,
                            device=device)
        drl_alg = REINFORCE(model=agent_net,
                            optimizer=optimizer_agent_net,
                            initial_states_func=agent_net_hidden_states_func,
                            initial_states_args=init_state_args,
                            prior_data_gen=prior_data_gen,
                            device=device,
                            xent_lambda=hparams['xent_lambda'],
                            gamma=hparams['gamma'],
                            grad_clipping=hparams['reinforce_max_norm'],
                            lr_decay_gamma=hparams['lr_decay_gamma'],
                            lr_decay_step=hparams['lr_decay_step_size'],
                            delayed_reward=not hparams['use_monte_carlo_sim'])

        # Reward function entities
        reward_net = nn.Sequential(
            encoder,
            RewardNetRNN(
                input_size=hparams['d_model'],
                hidden_size=hparams['reward_params']['d_model'],
                num_layers=hparams['reward_params']['num_layers'],
                bidirectional=hparams['reward_params']['bidirectional'],
                use_attention=hparams['reward_params']['use_attention'],
                dropout=hparams['dropout'],
                unit_type=hparams['reward_params']['unit_type'],
                use_smiles_validity_flag=hparams['reward_params']
                ['use_validity_flag']))
        reward_net = reward_net.to(device)

        expert_model = XGBPredictor(hparams['expert_model_dir'])
        true_reward_func = get_jak2_max_reward if hparams[
            'bias_mode'] == 'max' else get_jak2_min_reward
        reward_function = RewardFunction(
            reward_net,
            mc_policy=agent,
            actions=demo_data_gen.all_characters,
            device=device,
            use_mc=hparams['use_monte_carlo_sim'],
            mc_max_sims=hparams['monte_carlo_N'],
            expert_func=expert_model,
            no_mc_fill_val=hparams['no_mc_fill_val'],
            true_reward_func=true_reward_func,
            use_true_reward=hparams['use_true_reward'])
        optimizer_reward_net = parse_optimizer(hparams['reward_params'],
                                               reward_net)
        demo_data_gen.set_batch_size(
            hparams['reward_params']['demo_batch_size'])
        irl_alg = GuidedRewardLearningIRL(
            reward_net,
            optimizer_reward_net,
            demo_data_gen,
            k=hparams['reward_params']['irl_alg_num_iter'],
            agent_net=agent_net,
            agent_net_init_func=agent_net_hidden_states_func,
            agent_net_init_func_args=init_state_args,
            device=device)

        init_args = {
            'agent': agent,
            'probs_reg': probs_reg,
            'drl_alg': drl_alg,
            'irl_alg': irl_alg,
            'reward_func': reward_function,
            'gamma': hparams['gamma'],
            'episodes_to_train': hparams['episodes_to_train'],
            'expert_model': expert_model,
            'demo_data_gen': demo_data_gen,
            'unbiased_data_gen': unbiased_data_gen,
            'gen_args': {
                'num_layers': hparams['agent_params']['num_layers'],
                'hidden_size': hparams['d_model'],
                'num_dir': 1,
                'stack_depth': hparams['agent_params']['stack_depth'],
                'stack_width': hparams['agent_params']['stack_width'],
                'has_stack': has_stack,
                'has_cell': hparams['agent_params']['unit_type'] == 'lstm',
                'device': device
            }
        }
        return init_args
Ejemplo n.º 5
0
def initialize(hparams, demo_data_gen, unbiased_data_gen, has_critic):
    # Embeddings provider
    encoder = Encoder(vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'],
                      padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol],
                      dropout=hparams['dropout'], return_tuple=True).eval()

    # Agent entities
    rnn_layers = []
    has_stack = True
    for i in range(1, hparams['agent_params']['num_layers'] + 1):
        rnn_layers.append(StackRNN(layer_index=i,
                                   input_size=hparams['d_model'],
                                   hidden_size=hparams['d_model'],
                                   has_stack=has_stack,
                                   unit_type=hparams['agent_params']['unit_type'],
                                   stack_width=hparams['agent_params']['stack_width'],
                                   stack_depth=hparams['agent_params']['stack_depth'],
                                   k_mask_func=encoder.k_padding_mask))
        if hparams['agent_params']['num_layers'] > 1:
            rnn_layers.append(StackedRNNDropout(hparams['dropout']))
            rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))
    agent_net = nn.Sequential(encoder,
                              *rnn_layers,
                              RNNLinearOut(out_dim=demo_data_gen.n_characters,
                                           hidden_size=hparams['d_model'],
                                           bidirectional=False,
                                           bias=True))
    agent_net = agent_net.to(device).eval()
    init_state_args = {'num_layers': hparams['agent_params']['num_layers'],
                       'hidden_size': hparams['d_model'],
                       'stack_depth': hparams['agent_params']['stack_depth'],
                       'stack_width': hparams['agent_params']['stack_width'],
                       'unit_type': hparams['agent_params']['unit_type']}
    if has_critic:
        critic = nn.Sequential(encoder,
                               CriticRNN(hparams['d_model'], hparams['critic_params']['d_model'],
                                         unit_type=hparams['critic_params']['unit_type'],
                                         dropout=hparams['critic_params']['dropout'],
                                         num_layers=hparams['critic_params']['num_layers']))
        critic = critic.to(device).eval()
    else:
        critic = None

    # Reward function entities
    reward_net_rnn = RewardNetRNN(input_size=hparams['d_model'], hidden_size=hparams['reward_params']['d_model'],
                                  num_layers=hparams['reward_params']['num_layers'],
                                  bidirectional=hparams['reward_params']['bidirectional'],
                                  use_attention=hparams['reward_params']['use_attention'],
                                  dropout=hparams['reward_params']['dropout'],
                                  unit_type=hparams['reward_params']['unit_type'],
                                  use_smiles_validity_flag=hparams['reward_params']['use_validity_flag'])
    reward_net = nn.Sequential(encoder,
                               reward_net_rnn)
    reward_net = reward_net.to(device)
    # expert_model = RNNPredictor(hparams['expert_model_params'], device)
    demo_data_gen.set_batch_size(hparams['reward_params']['demo_batch_size'])

    init_args = {'agent_net': agent_net,
                 'critic_net': critic,
                 'reward_net': reward_net,
                 'reward_net_rnn': reward_net_rnn,
                 'encoder': encoder.eval(),
                 'gamma': hparams['gamma'],
                 # 'expert_model': expert_model,
                 'demo_data_gen': demo_data_gen,
                 'unbiased_data_gen': unbiased_data_gen,
                 'init_hidden_states_args': init_state_args,
                 'gen_args': {'num_layers': hparams['agent_params']['num_layers'],
                              'hidden_size': hparams['d_model'],
                              'num_dir': 1,
                              'stack_depth': hparams['agent_params']['stack_depth'],
                              'stack_width': hparams['agent_params']['stack_width'],
                              'has_stack': has_stack,
                              'has_cell': hparams['agent_params']['unit_type'] == 'lstm',
                              'device': device}}
    return init_args
Ejemplo n.º 6
0
    def test_policy_net(self):
        d_model = 8
        hidden_size = 16
        num_layers = 1
        stack_width = 10
        stack_depth = 20
        unit_type = 'lstm'

        # Create a function to provide initial hidden states
        def hidden_states_func(batch_size=1):
            return [
                get_initial_states(batch_size, hidden_size, 1, stack_depth,
                                   stack_width, unit_type)
                for _ in range(num_layers)
            ]

        # Encoder to map character indices to embeddings
        encoder = Encoder(gen_data.n_characters,
                          d_model,
                          gen_data.char2idx[gen_data.pad_symbol],
                          return_tuple=True)

        # Create agent network
        stack_rnn = StackRNN(1,
                             d_model,
                             hidden_size,
                             True,
                             'lstm',
                             stack_width,
                             stack_depth,
                             k_mask_func=encoder.k_padding_mask)
        stack_linear = RNNLinearOut(gen_data.n_characters,
                                    hidden_size,
                                    bidirectional=False)
        agent_net = torch.nn.Sequential(encoder, stack_rnn, stack_linear)

        # Create agent
        selector = MolEnvProbabilityActionSelector(
            actions=gen_data.all_characters)
        probs_reg = StateActionProbRegistry()
        agent = PolicyAgent(model=agent_net,
                            action_selector=selector,
                            states_preprocessor=seq2tensor,
                            initial_state=hidden_states_func,
                            apply_softmax=True,
                            probs_registry=probs_reg,
                            device='cpu')

        # Reward function model
        rnn = RewardNetRNN(d_model,
                           hidden_size,
                           num_layers,
                           bidirectional=True,
                           unit_type='gru')
        reward_net = torch.nn.Sequential(encoder, rnn)
        reward_function = RewardFunction(reward_net=reward_net,
                                         mc_policy=agent,
                                         actions=gen_data.all_characters)

        # Create molecule generation environment
        env = MoleculeEnv(gen_data.all_characters, reward_function)

        # Ptan ops for aggregating experiences
        exp_source = ExperienceSourceFirstLast(env, agent, gamma=0.97)

        rl_alg = REINFORCE(agent_net, torch.optim.Adam(agent_net.parameters()),
                           hidden_states_func)
        gen_data.set_batch_size(1)
        irl_alg = GuidedRewardLearningIRL(reward_net,
                                          torch.optim.Adam(
                                              reward_net.parameters()),
                                          demo_gen_data=gen_data)

        # Begin simulation and training
        batch_states, batch_actions, batch_qvals = [], [], []
        traj_prob = 1.
        for step_idx, exp in enumerate(exp_source):
            batch_states.append(exp.state)
            batch_actions.append(exp.action)
            batch_qvals.append(exp.reward)
            traj_prob *= probs_reg.get(list(exp.state), exp.action)

            print(
                f'state = {exp.state}, action = {exp.action}, reward = {exp.reward}, next_state = {exp.last_state}'
            )
            if step_idx == 5:
                break
    def initialize(hparams, data_gens, *args, **kwargs):
        for k in data_gens:
            data_gens[k].set_batch_size(hparams['batch_size'])
        gen_data = data_gens['prior_data']
        # Create main model
        encoder = Encoder(vocab_size=gen_data.n_characters,
                          d_model=hparams['d_model'],
                          padding_idx=gen_data.char2idx[gen_data.pad_symbol],
                          dropout=hparams['dropout'],
                          return_tuple=True)
        # Create RNN layers
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['unit_type'],
                         stack_width=hparams['stack_width'],
                         stack_depth=hparams['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))

        model = nn.Sequential(
            encoder,
            *rnn_layers,
            RNNLinearOut(
                out_dim=gen_data.n_characters,
                hidden_size=hparams['d_model'],
                bidirectional=False,
                # encoder=encoder,
                # dropout=hparams['dropout'],
                bias=True))
        if use_cuda:
            model = model.cuda()
        optimizer = parse_optimizer(hparams, model)
        rnn_args = {
            'num_layers': hparams['num_layers'],
            'hidden_size': hparams['d_model'],
            'num_dir': 1,
            'device': device,
            'has_stack': has_stack,
            'has_cell': hparams['unit_type'] == 'lstm',
            'stack_width': hparams['stack_width'],
            'stack_depth': hparams['stack_depth'],
            'demo_data_gen': data_gens['demo_data'],
            'unbiased_data_gen': data_gens['unbiased_data'],
            'prior_data_gen': data_gens['prior_data'],
            'expert_model': {
                'pretraining': DummyPredictor(),
                'drd2': RNNPredictor(hparams['drd2'], device, True),
                'logp': RNNPredictor(hparams['logp'], device),
                'jak2_max': XGBPredictor(hparams['jak2']),
                'jak2_min': XGBPredictor(hparams['jak2'])
            }.get(hparams['exp_type']),
            'exp_type': hparams['exp_type'],
        }
        return model, optimizer, rnn_args