def initialize(hparams, train_data, val_data, test_data): # Create pytorch data loaders train_loader = DataLoader(SmilesDataset(train_data[0], train_data[1]), batch_size=hparams['batch'], collate_fn=lambda x: x) if val_data: val_loader = DataLoader(SmilesDataset(val_data[0], val_data[1]), batch_size=hparams['batch'], collate_fn=lambda x: x) else: val_loader = None test_loader = DataLoader(SmilesDataset(test_data[0], test_data[1]), batch_size=hparams['batch'], collate_fn=lambda x: x) # Create model and optimizer model = RNNPredictorModel(d_model=int(hparams['d_model']), tokens=get_default_tokens(), num_layers=int(hparams['rnn_num_layers']), dropout=float(hparams['dropout']), bidirectional=hparams['is_bidirectional'], unit_type=hparams['unit_type'], device=device).to(device) optimizer = parse_optimizer(hparams, model) metrics = [mean_squared_error, root_mean_squared_error, r2_score] return { 'data_loaders': { 'train': train_loader, 'val': val_loader if val_data else None, 'test': test_loader }, 'model': model, 'optimizer': optimizer, 'metrics': metrics }
def initialize(hparams, train_data, val_data, test_data): # Create pytorch data loaders train_loader = DataLoader(SmilesDataset(train_data[0], train_data[1]), batch_size=hparams['batch'], shuffle=True, collate_fn=lambda x: x) if val_data: val_loader = DataLoader(SmilesDataset(val_data[0], val_data[1]), batch_size=hparams['batch'], collate_fn=lambda x: x) else: val_loader = None test_loader = DataLoader(SmilesDataset(test_data[0], test_data[1]), batch_size=hparams['batch'], collate_fn=lambda x: x) # Create model and optimizer model = torch.nn.Sequential(RNNPredictorModel(d_model=int(hparams['d_model']), tokens=get_default_tokens(), num_layers=int(hparams['rnn_num_layers']), dropout=float(hparams['dropout']), bidirectional=hparams['is_bidirectional'], unit_type=hparams['unit_type'], device=device), torch.nn.Sigmoid()).to(device) optimizer = parse_optimizer(hparams, model) metrics = [accuracy_score, precision_score, recall_score, f1_score] return {'data_loaders': {'train': train_loader, 'val': val_loader if val_data else None, 'test': test_loader}, 'model': model, 'optimizer': optimizer, 'metrics': metrics}
def initialize(hparams, gen_data, *args, **kwargs): gen_data.set_batch_size(hparams['batch_size']) # Create main model encoder = Encoder(vocab_size=gen_data.n_characters, d_model=hparams['d_model'], padding_idx=gen_data.char2idx[gen_data.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Create RNN layers rnn_layers = [] has_stack = True for i in range(1, hparams['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['unit_type'], stack_width=hparams['stack_width'], stack_depth=hparams['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) model = nn.Sequential( encoder, *rnn_layers, RNNLinearOut( out_dim=gen_data.n_characters, hidden_size=hparams['d_model'], bidirectional=False, # encoder=encoder, # dropout=hparams['dropout'], bias=True)) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) rnn_args = { 'num_layers': hparams['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'device': device, 'has_stack': has_stack, 'has_cell': hparams['unit_type'] == 'lstm', 'stack_width': hparams['stack_width'], 'stack_depth': hparams['stack_depth'] } return model, optimizer, gen_data, rnn_args
def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen, *args, **kwargs): prior_data_gen.set_batch_size(hparams['batch_size']) demo_data_gen.set_batch_size(hparams['batch_size']) # Create main model encoder = OneHotEncoder(vocab_size=prior_data_gen.n_characters, return_tuple=False, device=device) # Create RNN layers model = nn.Sequential( encoder, RNNGenerator(input_size=prior_data_gen.n_characters, hidden_size=hparams['d_model'], unit_type=hparams['unit_type'], num_layers=hparams['num_layers'], dropout=hparams['dropout'], device=device), RNNLinearOut(out_dim=prior_data_gen.n_characters, hidden_size=hparams['d_model'], bidirectional=False, bias=True)) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) gen_args = { 'num_layers': hparams['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'has_stack': False, 'has_cell': hparams['unit_type'] == 'lstm', 'device': device, 'expert_model': { 'pretraining': DummyPredictor(), 'drd2': RNNPredictor(hparams['drd2'], device, True), 'logp': RNNPredictor(hparams['logp'], device), 'jak2_max': XGBPredictor(hparams['jak2']), 'jak2_min': XGBPredictor(hparams['jak2']) }.get(hparams['exp_type']), 'demo_data_gen': demo_data_gen, 'unbiased_data_gen': unbiased_data_gen, 'prior_data_gen': prior_data_gen, 'exp_type': hparams['exp_type'], } print(f'Number of model parameters={count_parameters(model)}') return model, optimizer, gen_args
def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen, *args, **kwargs): # Embeddings provider encoder = Encoder( vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'], padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Agent entities rnn_layers = [] has_stack = True for i in range(1, hparams['agent_params']['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['agent_params']['unit_type'], stack_width=hparams['agent_params']['stack_width'], stack_depth=hparams['agent_params']['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['agent_params']['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) agent_net = nn.Sequential( encoder, *rnn_layers, RNNLinearOut(out_dim=demo_data_gen.n_characters, hidden_size=hparams['d_model'], bidirectional=False, bias=True)) agent_net = agent_net.to(device) optimizer_agent_net = parse_optimizer(hparams['agent_params'], agent_net) selector = MolEnvProbabilityActionSelector( actions=demo_data_gen.all_characters) probs_reg = StateActionProbRegistry() init_state_args = { 'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'unit_type': hparams['agent_params']['unit_type'] } agent = PolicyAgent(model=agent_net, action_selector=selector, states_preprocessor=seq2tensor, initial_state=agent_net_hidden_states_func, initial_state_args=init_state_args, apply_softmax=True, probs_registry=probs_reg, device=device) drl_alg = REINFORCE(model=agent_net, optimizer=optimizer_agent_net, initial_states_func=agent_net_hidden_states_func, initial_states_args=init_state_args, prior_data_gen=prior_data_gen, device=device, xent_lambda=hparams['xent_lambda'], gamma=hparams['gamma'], grad_clipping=hparams['reinforce_max_norm'], lr_decay_gamma=hparams['lr_decay_gamma'], lr_decay_step=hparams['lr_decay_step_size'], delayed_reward=not hparams['use_monte_carlo_sim']) # Reward function entities reward_net = nn.Sequential( encoder, RewardNetRNN( input_size=hparams['d_model'], hidden_size=hparams['reward_params']['d_model'], num_layers=hparams['reward_params']['num_layers'], bidirectional=hparams['reward_params']['bidirectional'], use_attention=hparams['reward_params']['use_attention'], dropout=hparams['dropout'], unit_type=hparams['reward_params']['unit_type'], use_smiles_validity_flag=hparams['reward_params'] ['use_validity_flag'])) reward_net = reward_net.to(device) expert_model = XGBPredictor(hparams['expert_model_dir']) true_reward_func = get_jak2_max_reward if hparams[ 'bias_mode'] == 'max' else get_jak2_min_reward reward_function = RewardFunction( reward_net, mc_policy=agent, actions=demo_data_gen.all_characters, device=device, use_mc=hparams['use_monte_carlo_sim'], mc_max_sims=hparams['monte_carlo_N'], expert_func=expert_model, no_mc_fill_val=hparams['no_mc_fill_val'], true_reward_func=true_reward_func, use_true_reward=hparams['use_true_reward']) optimizer_reward_net = parse_optimizer(hparams['reward_params'], reward_net) demo_data_gen.set_batch_size( hparams['reward_params']['demo_batch_size']) irl_alg = GuidedRewardLearningIRL( reward_net, optimizer_reward_net, demo_data_gen, k=hparams['reward_params']['irl_alg_num_iter'], agent_net=agent_net, agent_net_init_func=agent_net_hidden_states_func, agent_net_init_func_args=init_state_args, device=device) init_args = { 'agent': agent, 'probs_reg': probs_reg, 'drl_alg': drl_alg, 'irl_alg': irl_alg, 'reward_func': reward_function, 'gamma': hparams['gamma'], 'episodes_to_train': hparams['episodes_to_train'], 'expert_model': expert_model, 'demo_data_gen': demo_data_gen, 'unbiased_data_gen': unbiased_data_gen, 'gen_args': { 'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'has_stack': has_stack, 'has_cell': hparams['agent_params']['unit_type'] == 'lstm', 'device': device } } return init_args
def initialize(hparams, gen_data, *args, **kwargs): gen_data.set_batch_size(hparams['batch_size']) # Create stack-augmented transformer (Decoder) layer(s) encoder = Encoder(vocab_size=gen_data.n_characters, d_model=hparams['d_model'], padding_idx=gen_data.char2idx[gen_data.pad_symbol], dropout=hparams['dropout'], return_tuple=True) attn_layers = [] for i in range(hparams['attn_layers']): attn_layers.append( StackDecoderLayer(d_model=hparams['d_model'], num_heads=hparams['attn_heads'], stack_depth=hparams['stack_depth'], stack_width=hparams['stack_width'], d_ff=hparams['d_ff'], dropout=hparams['dropout'], k_mask_func=encoder.k_padding_mask, use_memory=hparams['has_stack'])) # Create classifier layers (post-attention layers) classifier_layers = [] p = hparams['d_model'] for dim in hparams['lin_dims']: classifier_layers.append(nn.Linear(p, dim)) classifier_layers.append(nn.LayerNorm(dim)) classifier_layers.append(nn.ReLU()) classifier_layers.append(nn.Dropout(hparams['dropout'])) p = dim classifier_layers.append(nn.Linear(p, gen_data.n_characters)) # classifier_layers.append(LinearOut(encoder.embeddings_weight, p, hparams['d_model'], hparams['dropout'])) # Create main model model = nn.Sequential( encoder, PositionalEncoding(d_model=hparams['d_model'], dropout=hparams['dropout']), # AttentionInitialize(d_hidden=hparams['d_model'], # s_width=hparams['stack_width'], # s_depth=hparams['stack_depth'], # dvc=f'{device}:{dvc_id}'), *attn_layers, AttentionTerminal(), *classifier_layers) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) # optimizer = get_std_opt(model, hparams['d_model']) # optimizer = AttentionOptimizer(model_size=hparams['d_model'], # factor=2, # warmup=4000, # optimizer=parse_optimizer(hparams, model)) init_args = { 'stack_width': hparams['stack_width'], 'stack_depth': hparams['stack_depth'], 'device': f'{device}:{dvc_id}', 'has_stack': hparams['has_stack'] } return model, optimizer, gen_data, init_args
def initialize(hparams, data_gens, *args, **kwargs): for k in data_gens: data_gens[k].set_batch_size(hparams['batch_size']) gen_data = data_gens['prior_data'] # Create main model encoder = Encoder(vocab_size=gen_data.n_characters, d_model=hparams['d_model'], padding_idx=gen_data.char2idx[gen_data.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Create RNN layers rnn_layers = [] has_stack = True for i in range(1, hparams['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['unit_type'], stack_width=hparams['stack_width'], stack_depth=hparams['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) model = nn.Sequential( encoder, *rnn_layers, RNNLinearOut( out_dim=gen_data.n_characters, hidden_size=hparams['d_model'], bidirectional=False, # encoder=encoder, # dropout=hparams['dropout'], bias=True)) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) rnn_args = { 'num_layers': hparams['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'device': device, 'has_stack': has_stack, 'has_cell': hparams['unit_type'] == 'lstm', 'stack_width': hparams['stack_width'], 'stack_depth': hparams['stack_depth'], 'demo_data_gen': data_gens['demo_data'], 'unbiased_data_gen': data_gens['unbiased_data'], 'prior_data_gen': data_gens['prior_data'], 'expert_model': { 'pretraining': DummyPredictor(), 'drd2': RNNPredictor(hparams['drd2'], device, True), 'logp': RNNPredictor(hparams['logp'], device), 'jak2_max': XGBPredictor(hparams['jak2']), 'jak2_min': XGBPredictor(hparams['jak2']) }.get(hparams['exp_type']), 'exp_type': hparams['exp_type'], } return model, optimizer, rnn_args