def build(self, _): # These layers are always used; initialize with any given model's hidden_dim random_init = tf.random_normal_initializer(stddev=self.config['base']['hidden_dim'] ** -0.5) self.embed = tf.Variable(random_init([self.vocab_dim, self.config['base']['hidden_dim']]), dtype=tf.float32) self.prediction = tf.keras.layers.Dense(2) # Two pointers: bug location and repair # Store for convenience self.pos_enc = tf.constant(util.positional_encoding(self.config['base']['hidden_dim'], 5000)) # Next, parse the main 'model' from the config join_dicts = lambda d1, d2: {**d1, **d2} # Small util function to combine configs base_config = self.config['base'] desc = self.config['configuration'].split(' ') self.stack = [] for kind in desc: if kind == 'rnn': self.stack.append(rnn.RNN(join_dicts(self.config['rnn'], base_config), shared_embedding=self.embed)) elif kind == 'ggnn': self.stack.append(ggnn.GGNN(join_dicts(self.config['ggnn'], base_config), shared_embedding=self.embed)) elif kind == 'great': self.stack.append(great_transformer.Transformer(join_dicts(self.config['transformer'], base_config), shared_embedding=self.embed)) elif kind == 'transformer': # Same as above, but explicitly without bias_dim set -- defaults to regular Transformer. joint_config = join_dicts(self.config['transformer'], base_config) joint_config['num_edge_types'] = None self.stack.append(great_transformer.Transformer(joint_config, shared_embedding=self.embed)) else: raise ValueError('Unknown model component provided:', kind)
def __init__(self, image_width: int = 64, image_height: int = 64, latent_dim: int = 32, time_steps: int = 10): self.image_height = image_height self.image_width = image_width self.latent_dim = latent_dim self.time_steps = time_steps self.image_input_shape = (image_height, image_width, 3) self.model_vae = vae.VAE(self.image_input_shape, self.latent_dim) self.vae_train_config_dict = None self.model_rnn = rnn.RNN(self.latent_dim, self.latent_dim, self.time_steps, contain_mdn_layer=True) self.rnn_train_config_dict = None
def generate_model_and_transformers(params, class_dict): """ Pick and construct the model and the init and drop transformers given the params, the init transformer makes it so that the data in the PytorchDataset is in the tensors of shape and sizes needed, the drop transformer randomly drops tokens at run time when a sample is returned from the dataset, to simulate unknown words. Also deals with selecting the right device and putting the model on that device, GPU is preferred if available. :return: model, data transformer at dataset initialization, data transformer at run time """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") w2v_vocab, w2v_weights = w2v_matrix_vocab_generator(params["w2v"]) c2v_vocab = None c2v_weights = None if params["c2v"] is not None: c2v_vocab, c2v_weights = w2v_matrix_vocab_generator(params["c2v"]) init_data_transform = data_manager.InitTransform(w2v_vocab, class_dict, c2v_vocab) drop_data_transform = data_manager.DropTransform(0.001, w2v_vocab["<UNK>"], w2v_vocab["<padding>"]) # needed for some models, given their architecture, i.e. CONV padded_sentence_length = 50 # needed by models when using c2v embeddings padded_word_length = 30 if params["model"] == "lstm": model = lstm.LSTM(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "gru": model = gru.GRU(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "rnn": model = rnn.RNN(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) elif params["model"] == "lstm2ch": model = lstm2ch.LSTM2CH(device, w2v_weights, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], params["embedding_norm"]) elif params["model"] == "encoder": tag_embedding_size = 20 model = encoder.EncoderDecoderRNN( device, w2v_weights, tag_embedding_size, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], params["embedding_norm"]) elif params["model"] == "attention": tag_embedding_size = 20 model = attention.Attention( device, w2v_weights, tag_embedding_size, params["hidden_size"], len(class_dict), params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], params["embedding_norm"], padded_sentence_length=padded_sentence_length) elif params["model"] == "conv": model = conv.CONV(device, w2v_weights, params["hidden_size"], len(class_dict), padded_sentence_length, params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"]) elif params["model"] == "fcinit": model = fcinit.FCINIT(device, w2v_weights, params["hidden_size"], len(class_dict), padded_sentence_length, params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"]) elif params["model"] == "lstmcrf": model = lstmcrf.LstmCrf(device, w2v_weights, class_dict, params["hidden_size"], params["drop"], params["bidirectional"], not params["unfreeze"], params["embedding_norm"], c2v_weights, padded_word_length) model = model.to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("total trainable parameters %i" % params) return model, init_data_transform, drop_data_transform
# -*- coding: utf-8 -*- from experiment_framework import Experiment from models import rnn from utils import default_config as dc from datasets import fb_dataset as fbd dataset = fbd.FullBeerDataset() model1 = rnn.RNN("m-id-1", dc.MODEL_MAPPING, dc.MODEL_ARGS) experiment1 = Experiment("e1").with_model(model1).with_config( dc.CONFIG).with_data(dataset).run()