Esempio n. 1
0
	def build(self, _):
		# These layers are always used; initialize with any given model's hidden_dim
		random_init = tf.random_normal_initializer(stddev=self.config['base']['hidden_dim'] ** -0.5)
		self.embed = tf.Variable(random_init([self.vocab_dim, self.config['base']['hidden_dim']]), dtype=tf.float32)
		self.prediction = tf.keras.layers.Dense(2) # Two pointers: bug location and repair
		
		# Store for convenience
		self.pos_enc = tf.constant(util.positional_encoding(self.config['base']['hidden_dim'], 5000))
		
		# Next, parse the main 'model' from the config
		join_dicts = lambda d1, d2: {**d1, **d2}  # Small util function to combine configs
		base_config = self.config['base']
		desc = self.config['configuration'].split(' ')
		self.stack = []
		for kind in desc:
			if kind == 'rnn':
				self.stack.append(rnn.RNN(join_dicts(self.config['rnn'], base_config), shared_embedding=self.embed))
			elif kind == 'ggnn':
				self.stack.append(ggnn.GGNN(join_dicts(self.config['ggnn'], base_config), shared_embedding=self.embed))
			elif kind == 'great':
				self.stack.append(great_transformer.Transformer(join_dicts(self.config['transformer'], base_config), shared_embedding=self.embed))
			elif kind == 'transformer':  # Same as above, but explicitly without bias_dim set -- defaults to regular Transformer.
				joint_config = join_dicts(self.config['transformer'], base_config)
				joint_config['num_edge_types'] = None
				self.stack.append(great_transformer.Transformer(joint_config, shared_embedding=self.embed))
			else:
				raise ValueError('Unknown model component provided:', kind)
    def __init__(self, image_width: int = 64, image_height: int = 64, latent_dim: int = 32, time_steps: int = 10):
        self.image_height = image_height
        self.image_width = image_width
        self.latent_dim = latent_dim
        self.time_steps = time_steps

        self.image_input_shape = (image_height, image_width, 3)

        self.model_vae = vae.VAE(self.image_input_shape, self.latent_dim)
        self.vae_train_config_dict = None

        self.model_rnn = rnn.RNN(self.latent_dim, self.latent_dim, self.time_steps, contain_mdn_layer=True)
        self.rnn_train_config_dict = None
Esempio n. 3
0
def generate_model_and_transformers(params, class_dict):
    """
    Pick and construct the model and the init and drop transformers given the params, the init transformer
    makes it so that the data in the PytorchDataset is in the tensors of shape and sizes needed, the drop transformer
    randomly drops tokens at run time when a sample is returned from the dataset, to simulate unknown words.
    Also deals with selecting the right device and putting the model on that device, GPU is preferred if available.
    :return: model, data transformer at dataset initialization, data transformer at run time
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    w2v_vocab, w2v_weights = w2v_matrix_vocab_generator(params["w2v"])
    c2v_vocab = None
    c2v_weights = None

    if params["c2v"] is not None:
        c2v_vocab, c2v_weights = w2v_matrix_vocab_generator(params["c2v"])

    init_data_transform = data_manager.InitTransform(w2v_vocab, class_dict,
                                                     c2v_vocab)
    drop_data_transform = data_manager.DropTransform(0.001, w2v_vocab["<UNK>"],
                                                     w2v_vocab["<padding>"])

    # needed for some models, given their architecture, i.e. CONV
    padded_sentence_length = 50
    # needed by models when using c2v embeddings
    padded_word_length = 30
    if params["model"] == "lstm":
        model = lstm.LSTM(device, w2v_weights, params["hidden_size"],
                          len(class_dict), params["drop"],
                          params["bidirectional"], not params["unfreeze"],
                          params["embedding_norm"], c2v_weights,
                          padded_word_length)
    elif params["model"] == "gru":
        model = gru.GRU(device, w2v_weights, params["hidden_size"],
                        len(class_dict), params["drop"],
                        params["bidirectional"], not params["unfreeze"],
                        params["embedding_norm"], c2v_weights,
                        padded_word_length)
    elif params["model"] == "rnn":
        model = rnn.RNN(device, w2v_weights, params["hidden_size"],
                        len(class_dict), params["drop"],
                        params["bidirectional"], not params["unfreeze"],
                        params["embedding_norm"], c2v_weights,
                        padded_word_length)
    elif params["model"] == "lstm2ch":
        model = lstm2ch.LSTM2CH(device, w2v_weights, params["hidden_size"],
                                len(class_dict), params["drop"],
                                params["bidirectional"],
                                params["embedding_norm"])
    elif params["model"] == "encoder":
        tag_embedding_size = 20
        model = encoder.EncoderDecoderRNN(
            device, w2v_weights, tag_embedding_size, params["hidden_size"],
            len(class_dict), params["drop"], params["bidirectional"],
            not params["unfreeze"], params["embedding_norm"],
            params["embedding_norm"])
    elif params["model"] == "attention":
        tag_embedding_size = 20
        model = attention.Attention(
            device,
            w2v_weights,
            tag_embedding_size,
            params["hidden_size"],
            len(class_dict),
            params["drop"],
            params["bidirectional"],
            not params["unfreeze"],
            params["embedding_norm"],
            params["embedding_norm"],
            padded_sentence_length=padded_sentence_length)
    elif params["model"] == "conv":
        model = conv.CONV(device, w2v_weights, params["hidden_size"],
                          len(class_dict), padded_sentence_length,
                          params["drop"], params["bidirectional"],
                          not params["unfreeze"], params["embedding_norm"])
    elif params["model"] == "fcinit":
        model = fcinit.FCINIT(device, w2v_weights, params["hidden_size"],
                              len(class_dict), padded_sentence_length,
                              params["drop"], params["bidirectional"],
                              not params["unfreeze"], params["embedding_norm"])
    elif params["model"] == "lstmcrf":
        model = lstmcrf.LstmCrf(device, w2v_weights, class_dict,
                                params["hidden_size"], params["drop"],
                                params["bidirectional"],
                                not params["unfreeze"],
                                params["embedding_norm"], c2v_weights,
                                padded_word_length)

    model = model.to(device)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("total trainable parameters %i" % params)
    return model, init_data_transform, drop_data_transform
Esempio n. 4
0
# -*- coding: utf-8 -*-
from experiment_framework import Experiment
from models import rnn
from utils import default_config as dc
from datasets import fb_dataset as fbd

dataset = fbd.FullBeerDataset()
model1 = rnn.RNN("m-id-1", dc.MODEL_MAPPING, dc.MODEL_ARGS)
experiment1 = Experiment("e1").with_model(model1).with_config(
    dc.CONFIG).with_data(dataset).run()