def __init__(self, vocab, d_model=512, d_k=64, d_v=64, d_ff=2048, dropout_rate=0.1): """ Init TransformerModel . @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param d_model (int): Embedding size (dimensionality) @param d_k (int): Query & Key size (dimensionality) @param d_v (int): Value size (dimensionality) @param d_ff (int): Feed-Forward Layer size (dimensionality) @param dropout_rate (float): Dropout probability, for attention """ super(TransformerModel, self).__init__() self.model_embeddings = ModelEmbeddings(d_model, vocab) self.vocab = vocab self.dropout = nn.Dropout(dropout_rate) self.encoder = Encoder() self.decoder = Decoder() self.outputlayer = Generator(d_model, len(vocab.tgt)) self.crit = LabelSmoothing(size=len(vocab.tgt), smoothing=0.1) self.d_model = d_model self.d_k = d_k self.d_v = d_v self.d_ff = d_ff self.dropout_rate = dropout_rate
def unit_encoder(): from EncoderDecoder import Encoder en = Encoder() emeddings = torch.randn(batch_size, seq, embedding_size) out = en(emeddings)
def __init__(self): super(Model, self).__init__() self.encoder = Encoder() self.encoder2 = Encoder2() self.decoder = Decoder() self.attn = Attn(HIDDEN_SIZE) self.concat = nn.Linear(HIDDEN_SIZE * 2, HIDDEN_SIZE) self.out = nn.Linear(HIDDEN_SIZE, 1)
def __init__(self, args): super(BahdanauAttSeq2Seq, self).__init__() self.args = args self.src_word2id = args.src_word2id self.tgt_id2word = args.tgt_id2word self.src_vocab_size = args.src_vocab_size self.tgt_vocab_size = args.tgt_vocab_size self.max_decode_len = args.max_decode_len self.device = args.device self.encoder = Encoder(args) self.decoder = BahdanauAttDecoder(args)
def __init__(self, args): super(LuongAttSeq2Seq, self).__init__() self.args = args self.src_word2id = args.src_word2id self.tgt_id2word = args.tgt_id2word self.src_vocab_size = args.src_vocab_size self.tgt_vocab_size = args.tgt_vocab_size self.max_decode_len = args.max_decode_len self.lstm_hidden_dim = args.lstm_hidden_dim self.device = args.device self.encoder = Encoder(args) self.decoder = LuongAttDecoder(args)
def __init__(self, args): super(SimpleSeq2Seq, self).__init__() self.args = args self.src_word2id = args.src_word2id self.tgt_id2word = args.tgt_id2word self.src_vocab_size = args.src_vocab_size self.tgt_vocab_size = args.tgt_vocab_size self.max_decode_len = args.max_decode_len self.device = args.device self.teacher_forcing_ratio = args.teacher_forcing_ratio self.encoder = Encoder(args) self.decoder = Decoder(args)
def __init__(self, source_vocab_len, target_vocab_len, str_to_index, index_to_str, embedding_size: int = 512, num_heads: int = 6, depth_qk: int = 64, depth_v: int = 64, device=torch.device('cuda')): super().__init__() # TODO see what values a needed where / adjust or remove initialize defautl values self.d_model = embedding_size self.num_heads = num_heads self.depth_qk = depth_qk self.depth_v = depth_v self.device = device # Initiliaze embeddings and their look up table self.src_embeddings_lookup = nn.Embedding(source_vocab_len, self.d_model).to(self.device) self.target_embeddings_lookup = nn.Embedding( target_vocab_len, self.d_model).to(self.device) self.str_to_index = str_to_index self.index_to_str = index_to_str # self.start = {'src': str_to_index.src['<sos>'], 'target': str_to_index.target['<sos>'] } self.end = { 'src': str_to_index.src['<eos>'], 'target': str_to_index.target['<eos>'] } self.mask = Mask(device=self.device).to(self.device) self.pad_mask = Pad_Mask(self.str_to_index, self.d_model, device=self.device).to(self.device) # test self.encoder = Encoder(N_layers=5).to(self.device) self.decoder = Decoder(N_layers=5).to(self.device) # in:( batch_size x sequence_size x embed_size ) ->out:( batch_size x sequence_size x target_vocab_len ) self.linear_to_vocab_dim = nn.Linear(embedding_size, target_vocab_len).to(self.device) self.positional_encodings = Positional_Encodings( self.d_model, self.device).to(self.device)
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __init__(self, X_dim, Z_dim, IOh_dims_Enc, IOh_dims_Dec, NL_types_Enc, NL_types_Dec, mb_size=64, beta=1, Nwu=1, lr=1e-3, bernoulli=True, gaussian=False, noiseIn=False, noiseGain=0.): """Create a VAE strucutre by setting all attributes and calling Encoder/Decoder constructors. Args: X_dim (int): input (data) and output dimension (e.g. 1024) Z_dim (int): latent space dimension (e.g. 10) IOh_dims_Enc (list): IO dimensions of encoder's layers (e.g. [1024, 600, 10]) IOh_dims_Dec (list): IO dimensions of decoder's layers (e.g. [10, 600, 1024]) NL_types_Enc (list): layers' non linear functions of encoder (e.g. ['relu']) NL_types_Dec (list): layers' non linear functions of decoder (e.g. ['relu', 'sigmoid']) mb_size (int): minibatch size (default 64) lr (float): learning rate (default 0.001) beta (float): coefficient for regularization term (e.g kll) in total loss (default 3) Nwu (int): warm-up time in epochs number (default 50) bernoulli (bool): flag for bernoulli VAE type (default True) gaussian (bool): flag for gaussian VAE type (default False) noiseIn (bool): noise input decoder data when training (default False) noiseGain (float) noise gain if noiseIn is True (default 0.) """ # superclass init super(VAE, self).__init__() self.created = False self.IOh_dims_Enc = IOh_dims_Enc self.IOh_dims_Dec = IOh_dims_Dec self.encoder = Encoder(X_dim, self.IOh_dims_Enc, Z_dim) self.decoder = Decoder(Z_dim, self.IOh_dims_Dec, X_dim, bernoulli, gaussian) if (self.encoder.created == False or self.decoder.created == False): print "ERROR_VAE: Wrong encoder/decoder structure" return None # check if NL_types length & layers number are the same self.NL_funcE = NL_types_Enc self.NL_funcD = NL_types_Dec # in Encoder if len(self.NL_funcE) != self.encoder.nb_h: print "ERROR_VAE: not enough or too many NL functions in encoder" return None # in Decoder if len(self.NL_funcD) != self.decoder.nb_h: print "ERROR_VAE: not enough or too many NL functions in decoder" return None # check if each elemt of NL_types exists in 'torch.nn.functional' module # in Encoder for index_h in range(self.encoder.nb_h): try: getattr(F, self.NL_funcE[index_h]) except AttributeError: pass print "ERROR_VAE: Wrong encoder NL function name" return None # in Decoder for index_h in range(self.decoder.nb_h): try: getattr(F, self.NL_funcD[index_h]) except AttributeError: pass print "ERROR_VAE: Wrong encoder NL function name" return None # store encoder and decoder parameters self.parameters = [] for nb_h in range(self.encoder.nb_h): self.parameters.append(self.encoder.weights_h[nb_h]) self.parameters.append(self.encoder.bias_h[nb_h]) self.parameters.append(self.encoder.weight_mu) self.parameters.append(self.encoder.bias_mu) self.parameters.append(self.encoder.weight_logSigma) self.parameters.append(self.encoder.bias_logSigma) for nb_h in range(self.decoder.nb_h): self.parameters.append(self.decoder.weights_h[nb_h]) self.parameters.append(self.decoder.bias_h[nb_h]) if self.decoder.gaussian and not self.decoder.bernoulli: self.parameters.append(self.decoder.weight_mu) self.parameters.append(self.decoder.bias_mu) self.parameters.append(self.decoder.weight_logSigma) self.parameters.append(self.decoder.bias_logSigma) # variables to infer self.z_mu = None self.z_logSigma = None self.X_sample = None self.X_mu = None self.X_logSigma = None # minibatch size self.mb_size = mb_size # learning rate self.lr = lr # regularization & warm-up self.beta = beta # avoid zero division if Nwu <= 0: Nwu = 1 self.N_wu = Nwu self.beta_inc = float(beta) / float(Nwu) self.beta_wu = 0 # VAE training state self.epoch_nb = 0 self.recon_loss = [] self.regul_loss = [] self.noise_in = noiseIn self.noise_gain = noiseGain # flags on vae creation self.created = True self.trained = False # flags on vae state self.saved = False self.loaded = False
def test_wrong_NN(self): inputDim = 513 outputDim = 6 dimValues = [513, 6] e = Encoder(inputDim, dimValues, outputDim) self.assertFalse(e.created)
def test_multiLayerNN(self): inputDim = 513 outputDim = 6 dimValues = [513, 128, 256, 64, 6] e = Encoder(inputDim, dimValues, outputDim) self.assertTrue(e.nb_h == 3)
def test_good_IODim(self): inputDim = 513 outputDim = 6 dimValues = [513, 128, 6] e = Encoder(inputDim, dimValues, outputDim) self.assertTrue(e.created)