def __init__(self, layer_e, hidden1, Zdim, layer_l, hidden3, layer_d, hidden4, logchange=True, Type='ZINB', n_centroids=4, penality="GMM"): super(scMVAE_Concat, self).__init__() ### function definition self.encoder_x = Encoder(layer_e, hidden1, Zdim) self.encoder_l = Encoder(layer_l, hidden3, 1) if Type == 'ZINB': self.decoder_x = Decoder_ZINB(layer_d, hidden4, layer_e[0]) else: self.decoder_x = Decoder(layer_d, hidden4, layer_e[0], Type) ### parameters definition self.logchange = logchange self.Type = Type self.penality = penality self.n_centroids = n_centroids self.pi = nn.Parameter(torch.ones(n_centroids) / n_centroids) # pc self.mu_c = nn.Parameter(torch.zeros(Zdim, n_centroids)) # mu self.var_c = nn.Parameter(torch.ones(Zdim, n_centroids)) # sigma^2
def load_encoder(cfg, enc_type): model = Encoder() if cfg.disent.load: fn = Path("checkpoint_{}.tar".format(cfg.disent.epoch_num)) model_fp = Path(cfg.disent.model_path) / Path(f"enc_{enc_type}") / fn model.load_state_dict( torch.load(model_fp, map_location=cfg.disent.device.type)) model = model.to(cfg.disent.device) return model
def __init__(self, params): super(Seq2Seq, self).__init__() self.params = params self.embedding_matrix = load_embedding_matrix() self.encoder = Encoder(params["vocab_size"], params["vector_dim"], params["encoder_units"], self.embedding_matrix) self.attention = Attention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["vector_dim"], params["decoder_units"], self.embedding_matrix)
def __init__(self, params): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"]) self.attention = LuongAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"]) self.pointer = Pointer()
def __init__(self, params, embeddings_matrix): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"], embeddings_matrix) self.attention = BahdanauAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"], embeddings_matrix) self.pointer = Pointer()
def __init__(self, params): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"]) self.attention = BahdanauAttention(params["attn_units"]) if params["coverage"]: self.coverage = Coverage(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"], params["use_stats"]) self.pointer = Pointer()
def __init__( self, src_lang, trg_lang, max_len=32, hid_dim=256, enc_layers=3, dec_layers=3, enc_heads=8, dec_heads=8, enc_pf_dim=512, dec_pf_dim=512, enc_dropout=0.1, dec_dropout=0.1, lr=0.0005, **kwargs, # throwaway ): super().__init__() self.save_hyperparameters() del self.hparams["src_lang"] del self.hparams["trg_lang"] self.src_lang = src_lang self.trg_lang = trg_lang self.encoder = Encoder( src_lang.n_words, hid_dim, enc_layers, enc_heads, enc_pf_dim, enc_dropout, device, ) self.decoder = Decoder( trg_lang.n_words, hid_dim, dec_layers, dec_heads, dec_pf_dim, dec_dropout, device, ) self.criterion = nn.CrossEntropyLoss( ignore_index=self.trg_lang.PAD_idx) self.initialize_weights() self.to(device)
def __init__(self, features, adj_lists, ft_size, n_h, activation, num_sample=[10, 10], skip_connection=False, gcn=True): super(DGI_ind, self).__init__() self.features = features self.skip_connection = skip_connection self.agg1 = MeanAggregator(features, cuda=torch.cuda.is_available(), gcn=gcn, name='l1') self.enc1 = Encoder(features, ft_size, n_h, adj_lists, self.agg1, num_sample=num_sample[0], gcn=gcn, cuda=torch.cuda.is_available(), activation=activation, skip_connection=skip_connection, name='l2') self.agg2 = MeanAggregator(lambda nodes: self.enc1(nodes), cuda=torch.cuda.is_available(), gcn=gcn, name='l3') self.enc2 = Encoder(lambda nodes: self.enc1(nodes), self.enc1.embed_dim, n_h, adj_lists, self.agg2, num_sample=num_sample[1], base_model=self.enc1, gcn=gcn, cuda=torch.cuda.is_available(), activation=activation, skip_connection=skip_connection, name='l4') self.read = AvgReadout() self.sigm = nn.Sigmoid() if skip_connection: self.disc = Discriminator(2 * n_h) else: self.disc = Discriminator(n_h)
def init_model(self): num_enc_layers = self.config['num_enc_layers'] num_enc_heads = self.config['num_enc_heads'] num_dec_layers = self.config['num_dec_layers'] num_dec_heads = self.config['num_dec_heads'] embed_dim = self.config['embed_dim'] ff_dim = self.config['ff_dim'] dropout = self.config['dropout'] # get encoder, decoder self.encoder = Encoder(num_enc_layers, num_enc_heads, embed_dim, ff_dim, dropout=dropout) self.decoder = Decoder(num_dec_layers, num_dec_heads, embed_dim, ff_dim, dropout=dropout) # leave layer norm alone init_func = nn.init.xavier_normal_ if self.config[ 'init_type'] == ac.XAVIER_NORMAL else nn.init.xavier_uniform_ for m in [ self.encoder.self_atts, self.encoder.pos_ffs, self.decoder.self_atts, self.decoder.pos_ffs, self.decoder.enc_dec_atts ]: for p in m.parameters(): if p.dim() > 1: init_func(p) else: nn.init.constant_(p, 0.)
class Seq2Seq(tf.keras.Model): def __init__(self, params): super(Seq2Seq, self).__init__() self.params = params self.embedding_matrix = load_embedding_matrix() self.encoder = Encoder(params["vocab_size"], params["vector_dim"], params["encoder_units"], self.embedding_matrix) self.attention = Attention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["vector_dim"], params["decoder_units"], self.embedding_matrix) def call_encoder(self, enc_inp): enc_hidden = self.encoder.init_hidden(self.params['batch_size']) enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden) return enc_output, enc_hidden def call_decoder(self, dec_input, dec_hidden, enc_output, target): predictions = [] for t in range(1, self.params['max_y_length'] + 2): context_vector, _ = self.attention(dec_hidden, enc_output) predict, dec_hidden = self.decoder(dec_input, dec_hidden, enc_output, context_vector) dec_input = tf.expand_dims(target[:, t], 1) # 使用teach forcing predictions.append(predict) return tf.stack(predictions, 1), dec_hidden def call_one_step_decoder(self, dec_input, dec_hidden, enc_output): context_vector, attention_weights = self.attention( dec_hidden, enc_output) prediction, dec_hidden = self.decoder(dec_input, None, None, context_vector) return prediction, dec_hidden, context_vector, attention_weights
def __init__(self, embedding_dim=256, vocab_size=388 + 2, num_layer=6, max_seq=2048, dropout=0.2, debug=False, loader_path=None, dist=False, writer=None): super().__init__() self.infer = False if loader_path is not None: self.load_config_file(loader_path) else: self._debug = debug self.max_seq = max_seq self.num_layer = num_layer self.embedding_dim = embedding_dim self.vocab_size = vocab_size self.dist = dist self.writer = writer self.Decoder = Encoder(num_layers=self.num_layer, d_model=self.embedding_dim, input_vocab_size=self.vocab_size, rate=dropout, max_len=max_seq) self.fc = torch.nn.Linear(self.embedding_dim, self.vocab_size)
def __init__(self, params): super(PGN, self).__init__() word_model_path = os.path.join(os.path.abspath('../'), 'data', 'w2v.model') vocab_path = os.path.join(os.path.abspath('../'), 'data', 'words_frequences.txt') self.params = params self.matrix = get_embedding(vocab_path, word_model_path, params) self.encoder = Encoder(params["vocab_size"], params["embed_size"], self.matrix, params["enc_units"], params["batch_size"]) self.attention = BahdanauAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], self.matrix, params["dec_units"], params["batch_size"]) self.pointer = Pointer()
def __init__(self, hp): super(UTransformer, self).__init__() self.hp = hp self.encoder = Encoder(num_layers=hp.num_blocks, num_predictor=hp.num_predictor, att_unit=(hp.vunits, hp.MTunits, hp.Tunits, hp.Munits), value_attr=(hp.V_kernel, hp.V_stride), in_seqlen=hp.in_seqlen, num_heads=hp.num_heads, model_structure=hp.model_structure, d_ff=hp.d_ff, d_model=hp.d_model, drop_rate=hp.dropout_rate) self.decoder = Decoder(num_layers=hp.num_blocks, num_predictor=hp.num_predictor, att_unit=(hp.vunits, hp.MTunits, hp.Tunits, hp.Munits), value_attr=(hp.V_kernel, hp.V_stride), out_seqlen=hp.out_seqlen, num_heads=hp.num_heads, model_structure=hp.model_structure, d_ff=hp.d_ff, d_model=hp.d_model, drop_rate=hp.dropout_rate)
def __init__(self,): super(PointerEncoder, self).__init__() self.encoder = Encoder(config.num_layers, config.n_head, config.d_model, config.vocab_size, config.max_enc_len, d_q, d_k, d_v, config.d_affine, config.embedding_dropout, config.att_dropout, config.fc_dorpout) self.W_h = nn.Linear(config.d_model, config.d_model, bias=False)
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, rate=0.1): super(Transformer, self).__init__() self.Encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, pe_input, rate) self.Decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, pe_target, rate) self.dense = tf.keras.layers.Dense(target_vocab_size)
def __init__(self, args): super(Transformer, self).__init__() self.args = args embed_dim = args.embed_dim fix_norm = args.fix_norm joint_vocab_size = args.joint_vocab_size lang_vocab_size = args.lang_vocab_size use_bias = args.use_bias self.scale = embed_dim ** 0.5 if args.mask_logit: # mask logits separately per language self.logit_mask = None else: # otherwise, use the same mask for all # this only masks out BOS and PAD mask = [1.] * joint_vocab_size mask[ac.BOS_ID] = 0. mask[ac.PAD_ID] = 0. self.logit_mask = torch.tensor(mask).type(torch.uint8) self.word_embedding = Parameter(torch.Tensor(joint_vocab_size, embed_dim)) self.lang_embedding = Parameter(torch.Tensor(lang_vocab_size, embed_dim)) self.out_bias = Parameter(torch.Tensor(joint_vocab_size)) if use_bias else None self.encoder1 = Encoder(args) self.encoder2 = Encoder(args) self.decoder = PEDecoder(args) # initialize nn.init.normal_(self.lang_embedding, mean=0, std=embed_dim ** -0.5) if fix_norm: d = 0.01 nn.init.uniform_(self.word_embedding, a=-d, b=d) else: nn.init.normal_(self.word_embedding, mean=0, std=embed_dim ** -0.5) if use_bias: nn.init.constant_(self.out_bias, 0.)
class PGN(tf.keras.Model): def __init__(self, params): super(PGN, self).__init__() word_model_path = os.path.join(os.path.abspath('../'), 'data', 'w2v.model') vocab_path = os.path.join(os.path.abspath('../'), 'data', 'words_frequences.txt') self.params = params self.matrix = get_embedding(vocab_path, word_model_path, params) self.encoder = Encoder(params["vocab_size"], params["embed_size"], self.matrix, params["enc_units"], params["batch_size"]) self.attention = BahdanauAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], self.matrix, params["dec_units"], params["batch_size"]) self.pointer = Pointer() def call_encoder(self, enc_inp): enc_hidden = self.encoder.initialize_hidden_state() enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden) return enc_hidden, enc_output def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len): predictions = [] attentions = [] p_gens = [] context_vector, _ = self.attention(dec_hidden, enc_output) for t in range(dec_inp.shape[1]): dec_x, pred, dec_hidden = self.decoder( tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn = self.attention(dec_hidden, enc_output) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) attentions.append(attn) p_gens.append(p_gen) final_dists = _calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) if self.params["mode"] == "train": return tf.stack(final_dists, 1), dec_hidden # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode else: return tf.stack(final_dists, 1), dec_hidden, context_vector, tf.stack( attentions, 1), tf.stack(p_gens, 1)
class PGN(tf.keras.Model): def __init__(self, params): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"]) self.attention = LuongAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"]) self.pointer = Pointer() def call_encoder(self, enc_inp): enc_hidden = self.encoder.initialize_hidden_state() #enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden) enc_output, enc_hidden, enc_state = self.encoder(enc_inp, enc_hidden) return enc_hidden, enc_state, enc_output ## Changes done def call(self, enc_output, dec_hidden, enc_state, enc_inp, enc_extended_inp, dec_inp, batch_oov_len): predictions = [] attentions = [] p_gens = [] #print('we wil call attention now') context_vector, _ = self.attention(dec_hidden, enc_output) for t in range(dec_inp.shape[1]): #print('Ok here we are 1') dec_x, pred, dec_hidden, context_vector, attn = self.decoder( tf.expand_dims(dec_inp[:, t], 1), [dec_hidden, enc_state], enc_output, context_vector) #Changes context_vector1, attn1 = self.attention(dec_hidden, enc_output) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) attentions.append(attn) p_gens.append(p_gen) final_dists = _calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) if self.params["mode"] == "train": return tf.stack( final_dists, 1 ), dec_hidden # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode else: return tf.stack(final_dists, 1), dec_hidden, context_vector, tf.stack( attentions, 1), tf.stack(p_gens, 1)
def __init__(self, input_vocab_size, output_vocab_size, d_model, n_layers, n_heads, d_ff, dropout_rate=0.1): super().__init__() self.encoder = Encoder(input_vocab_size, d_model, n_layers, n_heads, d_ff, dropout_rate) self.decoder = Decoder(output_vocab_size, d_model, n_layers, n_heads, d_ff, dropout_rate) self.final_output_dense = tf.keras.layers.Dense( output_vocab_size ) # map decoder output from d_model to output_vocab_size
def __init__(self, hps, device): super(LM, self).__init__() self.hps = hps self.device = device self.emb_size = hps.emb_size self.hidden_size = hps.hidden_size self.vocab_size = hps.vocab_size self.pad_idx = hps.pad_idx # componets self.layers = nn.ModuleDict() self.layers['word_embed'] = nn.Embedding(self.vocab_size, self.emb_size, padding_idx=self.pad_idx) self.layers['encoder'] = Encoder(self.emb_size, self.hidden_size, drop_ratio=hps.drop_ratio) self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, output_dim, maximum_position_encoding, rate=0.1): super(Transformer, self).__init__() #encoder层 self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate) #降维 self.x_flatten = tf.keras.layers.Flatten() #全连接层 self.final_layer = tf.keras.layers.Dense(output_dim, activation='sigmoid')
class PGN(tf.keras.Model): def __init__(self, params): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"]) self.attention = BahdanauAttention(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"]) self.pointer = Pointer() def call_encoder(self, enc_inp): enc_hidden = self.encoder.initialize_hidden_state() enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden) return enc_hidden, enc_output def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len): predictions = [] attentions = [] p_gens = [] context_vector, _ = self.attention(dec_hidden, enc_output) for t in range(dec_inp.shape[1]): dec_x, pred, dec_hidden = self.decoder( tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector) context_vector, attn = self.attention(dec_hidden, enc_output) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) predictions.append(pred) attentions.append(attn) p_gens.append(p_gen) final_dists = _calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) return tf.stack( final_dists, 1 ), dec_hidden # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, pe_input, pe_target, rate=0.1, training=True): super(Transformer, self).__init__() self.encoder = Encoder(num_layers, d_model, num_heads, dff, pe_input, rate) self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, pe_target, rate) self.final_layer = tf.keras.layers.Dense(target_vocab_size) self.training = training
def __init__(self, vocab_size, embed_size, output_size, filters=128, num_heads=1, ques_limit=50, dropout=0.1, num_blocks=1, num_convs=2, embeddings=None, initializer=tf.variance_scaling_initializer( 1, 'fan_in', distribution='normal'), regularizer=l2(3e-7)): self.ques_limit = ques_limit self.num_blocks = num_blocks self.num_convs = num_convs self.dropout = dropout if embeddings is not None: embeddings = [embeddings] self.embed_layer = Embedding(vocab_size, embed_size, weights=embeddings, trainable=False) self.highway = Highway(embed_size, 2, initializer, regularizer, dropout) self.projection = Conv1D(filters, 1, activation='linear', kernel_initializer=initializer, kernel_regularizer=regularizer, bias_regularizer=regularizer) self.encoder = Encoder(filters, 7, num_blocks, num_convs, num_heads, initializer, regularizer, dropout) self.output_layer = Conv1D(output_size, 1, activation='linear', kernel_regularizer=regularizer)
def __init__(self, hps, device): super(Seq2Seq, self).__init__() self.hps = hps self.device = device self.emb_size = hps.emb_size self.hidden_size = hps.hidden_size self.flow_h_size = hps.flow_h_size self.flow_depth = hps.flow_depth self.vocab_size = hps.vocab_size self.max_len = hps.max_len self._infor_nats = hps.infor_nats self._infor_groups = hps.infor_groups self.pad_idx = hps.pad_idx self.bos_idx = hps.bos_idx self.bos_tensor = torch.tensor(self.bos_idx, dtype=torch.long, device=device).view(1, 1) # we directly set the latent size to be same as that of sentence representation v_k self.latent_size = self.hidden_size * 2 # componets self.layers = nn.ModuleDict() self.layers['word_embed'] = nn.Embedding(self.vocab_size, self.emb_size, padding_idx=self.pad_idx) self.layers['source_encoder'] = Encoder(self.emb_size, self.hidden_size, drop_ratio=hps.drop_ratio) self.layers['style_encoder'] = Encoder(self.emb_size, self.hidden_size, drop_ratio=hps.drop_ratio) # decoder inpus: word embedding, encoder states, and style representation z self.layers['decoder'] = Decoder(self.emb_size + self.hidden_size * 2 + self.latent_size, self.hidden_size, drop_ratio=hps.drop_ratio, attn_drop_ratio=hps.attn_drop_ratio) self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size) # MLP for calculate dec init state self.layers['dec_init_h'] = nn.Sequential( nn.Linear(self.hidden_size * 2, self.hidden_size), nn.Tanh()) self.layers['dec_init_c'] = nn.Sequential( nn.Linear(self.hidden_size * 2, self.hidden_size), nn.Tanh()) self.layers['flow_h_proj'] = nn.Sequential( nn.Linear(self.hidden_size * 2, self.flow_h_size), nn.Tanh()) # the Inverse Autoregressive Flow (IAF) module self.layers['iaf'] = IAF(n_z=self.latent_size, n_h=self.flow_h_size, n_made=hps.made_size, flow_depth=self.flow_depth) # -------------- self._log2pi = torch.log( torch.tensor(2 * np.pi, dtype=torch.float, device=device)) # log(2pi)
def get_full_model(vocab_size=len(hparams.VOCAB), char_embed_size=hparams.CHAR_EMBED_SIZE, sliding_window_size=hparams.SLIDING_WINDOW_SIZE, spk_embed_lstm_units=hparams.SPK_EMBED_LSTM_UNITS, spk_embed_size=hparams.SPK_EMBED_SIZE, spk_embed_num_layers=hparams.SPK_EMBED_NUM_LAYERS, enc_conv1_bank_depth=hparams.ENC_CONV1_BANK_DEPTH, enc_convprojec_filters1=hparams.ENC_CONVPROJEC_FILTERS1, enc_convprojec_filters2=hparams.ENC_CONVPROJEC_FILTERS2, enc_highway_depth=hparams.ENC_HIGHWAY_DEPTH, hidden_size=hparams.HIDDEN_SIZE, post_conv1_bank_depth=hparams.POST_CONV1_BANK_DEPTH, post_convprojec_filters1=hparams.POST_CONVPROJEC_FILTERS1, post_convprojec_filters2=hparams.POST_CONVPROJEC_FILTERS2, post_highway_depth=hparams.POST_HIGHWAY_DEPTH, attention_dim=hparams.ATTENTION_DIM, target_size=hparams.TARGET_MAG_FRAME_SIZE, n_mels=hparams.SYNTHESIZER_N_MELS, output_per_step=hparams.OUTPUT_PER_STEP, embed_mels=hparams.SPK_EMBED_N_MELS, enc_seq_len=None, dec_seq_len=None): char_inputs = Input(shape=(enc_seq_len, ), name='char_inputs') decoder_inputs = Input(shape=(dec_seq_len, n_mels), name='decoder_inputs') spk_inputs = Input(shape=(None, sliding_window_size, embed_mels), name='spk_embed_inputs') char_encoder = Encoder(hidden_size=hidden_size // 2, vocab_size=vocab_size, embedding_size=char_embed_size, conv1d_bank_depth=enc_conv1_bank_depth, convprojec_filters1=enc_convprojec_filters1, convprojec_filters2=enc_convprojec_filters2, highway_depth=enc_highway_depth, name='char_encoder') speaker_encoder = InferenceSpeakerEmbedding( lstm_units=spk_embed_lstm_units, proj_size=spk_embed_size, num_layers=spk_embed_num_layers, trainable=False, name='embeddings') condition = Conditioning() decoder = Decoder(hidden_size=hidden_size, attention_dim=attention_dim, n_mels=n_mels, output_per_step=output_per_step, name='decoder') post_processing = PostProcessing( hidden_size=hidden_size // 2, conv1d_bank_depth=post_conv1_bank_depth, convprojec_filters1=post_convprojec_filters1, convprojec_filters2=post_convprojec_filters2, highway_depth=post_highway_depth, n_fft=target_size, name='postprocessing') char_enc = char_encoder(char_inputs) spk_embed = speaker_encoder(spk_inputs) conditioned_char_enc = condition([char_enc, spk_embed]) decoder_pred, alignments = decoder([conditioned_char_enc, decoder_inputs], initial_state=None) postnet_out = post_processing(decoder_pred) full_model = Model( inputs=[char_inputs, spk_inputs, decoder_inputs], outputs=[decoder_pred, postnet_out, alignments, spk_embed]) return full_model
def __init__(self, vocab_size, embed_size, filters=128, num_heads=8, encoder_num_blocks=1, encoder_num_convs=4, output_num_blocks=7, output_num_convs=2, cont_limit=400, ques_limit=50, dropout=0.1, embeddings=None, initializer=tf.variance_scaling_initializer( 1, 'fan_in', distribution='normal'), regularizer=l2(3e-7)): self.cont_limit = cont_limit self.ques_limit = ques_limit self.dropout = dropout self.encoder_num_blocks = encoder_num_blocks self.encoder_num_convs = encoder_num_convs if embeddings is not None: embeddings = [embeddings] self.embed_layer = Embedding(vocab_size, embed_size, weights=embeddings, trainable=False) self.highway = Highway(embed_size, 2, initializer, regularizer, dropout) self.projection1 = Conv1D(filters, 1, activation='linear', kernel_initializer=initializer, kernel_regularizer=regularizer, bias_regularizer=regularizer) self.encoder = Encoder(filters, 7, encoder_num_blocks, encoder_num_convs, num_heads, initializer, regularizer, dropout) self.coattention = ContextQueryAttention(cont_limit, ques_limit, initializer, regularizer, dropout) self.projection2 = Conv1D(filters, 1, activation='linear', kernel_initializer=initializer, kernel_regularizer=regularizer, bias_regularizer=regularizer) self.output_layer = Encoder(filters, 5, output_num_blocks, output_num_convs, num_heads, initializer, regularizer, dropout) self.start_layer = Conv1D(1, 1, activation='linear', kernel_initializer=initializer, kernel_regularizer=regularizer, bias_regularizer=regularizer) self.end_layer = Conv1D(1, 1, activation='linear', kernel_initializer=initializer, kernel_regularizer=regularizer, bias_regularizer=regularizer)
class PGN(tf.keras.Model): def __init__(self, params): super(PGN, self).__init__() self.params = params self.encoder = Encoder(params["vocab_size"], params["embed_size"], params["enc_units"], params["batch_size"]) self.attention = BahdanauAttention(params["attn_units"]) if params["coverage"]: self.coverage = Coverage(params["attn_units"]) self.decoder = Decoder(params["vocab_size"], params["embed_size"], params["dec_units"], params["batch_size"], params["use_stats"]) self.pointer = Pointer() def call_encoder(self, enc_inp): enc_hidden = self.encoder.initialize_hidden_state() enc_output, enc_hidden = self.encoder(enc_inp, enc_hidden) return enc_hidden, enc_output def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp, batch_oov_len, cov_vec, stats=None): predictions = [] attentions = [] p_gens = [] if self.params["coverage"]: cov_features = self.coverage(cov_vec) else: cov_features = None context_vector, _ = self.attention(dec_hidden, enc_output, cov_features) for t in range(dec_inp.shape[1]): dec_x, pred, dec_hidden = self.decoder( tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output, context_vector, stats) if self.params["coverage"]: cov_features = self.coverage(cov_vec) else: cov_features = None context_vector, attn = self.attention(dec_hidden, enc_output, cov_features) p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1)) if self.params["coverage"]: cov_vec += attn attn = tf.squeeze(attn, axis=-1) predictions.append(pred) attentions.append(attn) p_gens.append(p_gen) final_dists = _calc_final_dist(enc_extended_inp, predictions, attentions, p_gens, batch_oov_len, self.params["vocab_size"], self.params["batch_size"]) res = {} res["final_dists"] = tf.stack(final_dists, 1) res["dec_hidden"] = dec_hidden if self.params["coverage"] or self.params["mode"] != "train": res["cov_vec"] = cov_vec res["attn_weights"] = tf.stack(attentions, 1) if self.params["mode"] != "train": res["context"] = context_vector res["p_gens"] = tf.stack(p_gens, 1) return res # predictions_shape = (batch_size, dec_len, vocab_size) with dec_len = 1 in pred mode
def get_synthesizer_model( vocab_size=len(hparams.VOCAB), char_embed_size=hparams.CHAR_EMBED_SIZE, spk_embed_size=hparams.SPK_EMBED_SIZE, enc_conv1_bank_depth=hparams.ENC_CONV1_BANK_DEPTH, enc_convprojec_filters1=hparams.ENC_CONVPROJEC_FILTERS1, enc_convprojec_filters2=hparams.ENC_CONVPROJEC_FILTERS2, enc_highway_depth=hparams.ENC_HIGHWAY_DEPTH, hidden_size=hparams.HIDDEN_SIZE, post_conv1_bank_depth=hparams.POST_CONV1_BANK_DEPTH, post_convprojec_filters1=hparams.POST_CONVPROJEC_FILTERS1, post_convprojec_filters2=hparams.POST_CONVPROJEC_FILTERS2, post_highway_depth=hparams.POST_HIGHWAY_DEPTH, attention_dim=hparams.ATTENTION_DIM, target_size=hparams.TARGET_MAG_FRAME_SIZE, n_mels=hparams.SYNTHESIZER_N_MELS, output_per_step=hparams.OUTPUT_PER_STEP, learning_rate=hparams.LEARNING_RATE, clipnorm=hparams.CLIPNORM, enc_seq_len=None, dec_seq_len=None): char_inputs = Input(shape=(enc_seq_len, ), name='char_inputs') decoder_inputs = Input(shape=(dec_seq_len, n_mels), name='decoder_inputs') spk_embed_inputs = Input(shape=(spk_embed_size, ), name='spk_embed_inputs') char_encoder = Encoder(hidden_size=hidden_size // 2, vocab_size=vocab_size, embedding_size=char_embed_size, conv1d_bank_depth=enc_conv1_bank_depth, convprojec_filters1=enc_convprojec_filters1, convprojec_filters2=enc_convprojec_filters2, highway_depth=enc_highway_depth, name='char_encoder') condition = Conditioning() decoder = Decoder(hidden_size=hidden_size, attention_dim=attention_dim, n_mels=n_mels, output_per_step=output_per_step, name='decoder') post_processing = PostProcessing( hidden_size=hidden_size // 2, conv1d_bank_depth=post_conv1_bank_depth, convprojec_filters1=post_convprojec_filters1, convprojec_filters2=post_convprojec_filters2, highway_depth=post_highway_depth, n_fft=target_size, name='postprocessing') char_enc = char_encoder(char_inputs) conditioned_char_enc = condition([char_enc, spk_embed_inputs]) decoder_pred, alignments = decoder([conditioned_char_enc, decoder_inputs], initial_state=None) postnet_out = post_processing(decoder_pred) synthesizer_model = Model( inputs=[char_inputs, spk_embed_inputs, decoder_inputs], outputs=[decoder_pred, postnet_out, alignments]) optimizer = Adam(lr=learning_rate, clipnorm=clipnorm) synthesizer_model.compile(optimizer=optimizer, loss=['mae', 'mae', None], loss_weights=[1., 1., None]) return synthesizer_model
def __init__(self, encoder_1, hidden_1, Z_DIMS, decoder_share, share_hidden, decoder_1, hidden_2, encoder_l, hidden3, encoder_2, hidden_4, encoder_l1, hidden3_1, decoder_2, hidden_5, drop_rate, log_variational=True, Type='Bernoulli', device='cpu', n_centroids=19, penality="GMM", model=2): super(scMVAE_POE, self).__init__() self.X1_encoder = Encoder(encoder_1, hidden_1, Z_DIMS, dropout_rate=drop_rate) self.X1_encoder_l = Encoder(encoder_l, hidden3, 1, dropout_rate=drop_rate) self.X1_decoder = Decoder_ZINB(decoder_1, hidden_2, encoder_1[0], dropout_rate=drop_rate) self.X2_encoder = Encoder(encoder_2, hidden_4, Z_DIMS, dropout_rate=drop_rate) self.decode_share = build_multi_layers(decoder_share, dropout_rate=drop_rate) if Type == 'ZINB': self.X2_encoder_l = Encoder(encoder_l1, hidden3_1, 1, dropout_rate=drop_rate) self.decoder_x2 = Decoder_ZINB(decoder_2, hidden_5, encoder_2[0], dropout_rate=drop_rate) elif Type == 'Bernoulli': self.decoder_x2 = Decoder(decoder_2, hidden_5, encoder_2[0], Type, dropout_rate=drop_rate) elif Type == "Possion": self.decoder_x2 = Decoder(decoder_2, hidden_5, encoder_2[0], Type, dropout_rate=drop_rate) else: self.decoder_x2 = Decoder(decoder_2, hidden_5, encoder_2[0], Type, dropout_rate=drop_rate) self.experts = ProductOfExperts() self.Z_DIMS = Z_DIMS self.share_hidden = share_hidden self.log_variational = log_variational self.Type = Type self.decoder_share = decoder_share self.decoder_1 = decoder_1 self.n_centroids = n_centroids self.penality = penality self.device = device self.model = model self.pi = nn.Parameter(torch.ones(n_centroids) / n_centroids) # pc self.mu_c = nn.Parameter(torch.zeros(Z_DIMS, n_centroids)) # mu self.var_c = nn.Parameter(torch.ones(Z_DIMS, n_centroids)) # sigma^2