def __init__(self, config):
        super(Transformer_EncoderDecoder, self).__init__()
        c = copy.deepcopy
        self.attn = MultiHeadedAttention(config['head'], config['emb_dim'])
        self.ff = PositionwiseFeedForward(config['emb_dim'], config['d_ff'],
                                          config['drop_out'])
        self.position = PositionalEncoding(config['emb_dim'],
                                           config['drop_out'])
        self.encoder = Encoder(
            EncoderLayer(config['emb_dim'], c(self.attn), c(self.ff),
                         config['drop_out']), config['N_layers'])
        self.decoder = Decoder(
            DecoderLayer(config['emb_dim'], c(self.attn), c(self.attn),
                         c(self.ff), config['drop_out']), config['N_layers'])
        self.src_embed = nn.Sequential(
            Embeddings(config['emb_dim'], config['vocab_size']),
            c(self.position))
        self.tgt_embed = nn.Sequential(
            Embeddings(config['emb_dim'], config['vocab_size']),
            c(self.position))
        self.generator = Generator(config['emb_dim'], config['vocab_size'])
        self.fc_out = nn.Linear(config['emb_dim'], config['vocab_size'])

        self.model = EncoderDecoder(self.encoder, self.decoder, self.src_embed,
                                    self.tgt_embed, self.generator)
Example #2
0
    def __init__(self, args):
        super(SSM, self).__init__()

        self.s_dim = s_dim = args.s_dim
        self.a_dim = a_dim = args.a_dim
        self.o_dim = o_dim = args.o_dim
        self.h_dim = h_dim = args.h_dim
        self.device = args.device
        self.args = args

        self.encoder = torch.nn.DataParallel(
            Encoder(o_dim, h_dim).to(self.device[0]), self.device)
        self.decoder = torch.nn.DataParallel(
            Decoder(s_dim, o_dim).to(self.device[0]), self.device)
        self.prior = torch.nn.DataParallel(
            Prior(s_dim, a_dim).to(self.device[0]), self.device)
        self.posterior = torch.nn.DataParallel(
            Posterior(self.prior, s_dim, a_dim, h_dim).to(self.device[0]),
            self.device)

        self.distributions = nn.ModuleList(
            [self.prior, self.posterior, self.encoder, self.decoder])
        init_weights(self.distributions)

        # for s_aux_loss
        self.prior01 = Normal(torch.tensor(0.), scale=torch.tensor(1.))

        self.g_optimizer = optim.Adam(self.distributions.parameters())
Example #3
0
    def __init__(self, obs, nums, glimpse_size=(20, 20),
                 inpt_encoder_hidden=[256]*2,
                 glimpse_encoder_hidden=[256]*2,
                 glimpse_decoder_hidden=[252]*2,
                 transform_estimator_hidden=[256]*2,
                 steps_pred_hidden=[50]*1,
                 baseline_hidden=[256, 128]*1,
                 transform_var_bias=-2.,
                 step_bias=0.,
                 *args, **kwargs):

        self.baseline = BaselineMLP(baseline_hidden)

        def _make_transform_estimator(x):
            est = StochasticTransformParam(transform_estimator_hidden, x, scale_bias=transform_var_bias)
            return est

        super(AIRonMNIST, self).__init__(
            *args,
            obs=obs,
            nums=nums,
            glimpse_size=glimpse_size,
            n_appearance=50,
            transition=snt.LSTM(256),
            input_encoder=(lambda: Encoder(inpt_encoder_hidden)),
            glimpse_encoder=(lambda: Encoder(glimpse_encoder_hidden)),
            glimpse_decoder=(lambda x: Decoder(glimpse_decoder_hidden, x)),
            transform_estimator=_make_transform_estimator,
            steps_predictor=(lambda: StepsPredictor(steps_pred_hidden, step_bias)),
            output_std=.3,
            **kwargs
        )
Example #4
0
    def __init__(self, config, vocab_size):
        super(DeepAPI, self).__init__()
        self.vocab_size = vocab_size
        self.maxlen = config['maxlen']
        self.clip = config['clip']
        self.temp = config['temp']

        self.desc_embedder = nn.Embedding(vocab_size,
                                          config['emb_size'],
                                          padding_idx=PAD_ID)
        self.api_embedder = nn.Embedding(vocab_size,
                                         config['emb_size'],
                                         padding_idx=PAD_ID)
        # utter encoder: encode response to vector
        self.encoder = Encoder(self.desc_embedder, config['emb_size'],
                               config['n_hidden'], True, config['n_layers'],
                               config['noise_radius'])
        self.decoder = Decoder(self.api_embedder, config['emb_size'],
                               config['n_hidden'] * 2, vocab_size,
                               config['use_attention'], 1,
                               config['dropout'])  # utter decoder: P(x|c,z)
        self.optimizer = optim.Adadelta(list(self.encoder.parameters()) +
                                        list(self.decoder.parameters()),
                                        lr=config['lr_ae'],
                                        rho=0.95)
        self.criterion_ce = nn.CrossEntropyLoss()
Example #5
0
def da_rnn(train_data: TrainData, n_targs: int, encoder_hidden_size=64, decoder_hidden_size=64,
           T=10, learning_rate=0.01, batch_size=128):

    train_cfg = TrainConfig(T, int(train_data.feats.shape[0] * 0.7), batch_size, nn.MSELoss())
    logger.info(f"Training size: {train_cfg.train_size:d}.")

    enc_kwargs = {"input_size": train_data.feats.shape[1], "hidden_size": encoder_hidden_size, "T": T}
    encoder = Encoder(**enc_kwargs).to(device)
    with open(os.path.join("data", "enc_kwargs.json"), "w") as fi:
        json.dump(enc_kwargs, fi, indent=4)

    dec_kwargs = {"encoder_hidden_size": encoder_hidden_size,
                  "decoder_hidden_size": decoder_hidden_size, "T": T, "out_feats": n_targs}
    decoder = Decoder(**dec_kwargs).to(device)
    with open(os.path.join("data", "dec_kwargs.json"), "w") as fi:
        json.dump(dec_kwargs, fi, indent=4)

    encoder_optimizer = optim.Adam(
        params=[p for p in encoder.parameters() if p.requires_grad],
        lr=learning_rate)
    decoder_optimizer = optim.Adam(
        params=[p for p in decoder.parameters() if p.requires_grad],
        lr=learning_rate)
    da_rnn_net = DaRnnNet(encoder, decoder, encoder_optimizer, decoder_optimizer)

    return train_cfg, da_rnn_net
Example #6
0
 def __init__(self, vocab, feats_size, kernel_size, rec_field, attn_size,
              hidden_size, mid_layer, dropout, which):
     super(TextNormalizer, self).__init__()
     self.vocab = vocab
     self.encoder = Encoder(len(vocab), feats_size, kernel_size, rec_field,
                            dropout, which)
     self.decoder = Decoder(len(vocab), feats_size, attn_size, hidden_size,
                            mid_layer, dropout)
     self.init_hidden = InitialWeights(hidden_size, mid_layer, 4)
Example #7
0
def da_rnn(train_data,
           n_targs: int,
           encoder_hidden_size=64,
           decoder_hidden_size=64,
           T=10,
           learning_rate=0.01,
           batch_size=128):

    train_cfg = TrainConfig(T, int(train_data.feats.shape[0] * 0.7),
                            batch_size, nn.MSELoss())
    logging.info(f"Training size: {train_cfg.train_size:d}.")

    enc_params = pd.DataFrame([{
        'input_size': train_data.feats.shape[1],
        'hidden_size': encoder_hidden_size,
        'T': T
    }])
    enc_params.to_csv(os.path.join('results', save_name, 'enc_params.csv'))

    encoder = Encoder(input_size=enc_params['input_size'][0].item(),
                      hidden_size=enc_params['hidden_size'][0].item(),
                      T=enc_params['T'][0].item()).cuda()

    dec_params = pd.DataFrame([{
        'encoder_hidden_size': encoder_hidden_size,
        'decoder_hidden_size': decoder_hidden_size,
        'T': T,
        'out_feats': n_targs
    }])
    dec_params.to_csv(os.path.join('results', save_name, 'dec_params.csv'))

    decoder = Decoder(
        encoder_hidden_size=dec_params['encoder_hidden_size'][0].item(),
        decoder_hidden_size=dec_params['decoder_hidden_size'][0].item(),
        T=dec_params['T'][0].item(),
        out_feats=dec_params['out_feats'][0].item()).cuda()

    encoder_optimizer = optim.Adam(
        params=[p for p in encoder.parameters() if p.requires_grad],
        lr=learning_rate,
        weight_decay=args.wdecay)

    decoder_optimizer = optim.Adam(
        params=[p for p in decoder.parameters() if p.requires_grad],
        lr=learning_rate,
        weight_decay=args.wdecay)

    encoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        encoder_optimizer, train_data.feats.shape[0], eta_min=args.min_lr)
    decoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        decoder_optimizer, train_data.feats.shape[0], eta_min=args.min_lr)

    model = DaRnnNet(encoder, decoder, encoder_optimizer, decoder_optimizer,
                     encoder_scheduler, decoder_scheduler)

    return train_cfg, model
Example #8
0
    def __init__(self, input_size, output_size, resume=False):
        super(RNN, self).__init__()

        self.encoder = Encoder(input_size)
        self.decoder = Decoder(output_size)

        self.loss = nn.CrossEntropyLoss()
        self.encoder_optimizer = optim.Adam(self.encoder.parameters())
        self.decoder_optimizer = optim.Adam(self.decoder.parameters())

        if resume:
            self.encoder.load_state_dict(torch.load("models/encoder.ckpt"))
            self.decoder.load_state_dict(torch.load("models/decoder.ckpt"))
Example #9
0
def da_rnn(train_data,
           n_targs: int,
           encoder_hidden_size=64,
           decoder_hidden_size=64,
           T=10,
           learning_rate=0.01,
           batch_size=128):

    train_cfg = TrainConfig(T, int(train_data.feats.shape[0] * 0.7),
                            batch_size, nn.MSELoss())
    logging.info(f"Training size: {train_cfg.train_size:d}.")

    enc_kwargs = {
        "input_size": train_data.feats.shape[1],
        "hidden_size": encoder_hidden_size,
        "T": T
    }
    encoder = Encoder(**enc_kwargs).cuda()
    with open(os.path.join("data", "enc_kwargs.json"), "w") as fi:
        json.dump(enc_kwargs, fi, indent=4)

    dec_kwargs = {
        "encoder_hidden_size": encoder_hidden_size,
        "decoder_hidden_size": decoder_hidden_size,
        "T": T,
        "out_feats": n_targs
    }
    decoder = Decoder(**dec_kwargs).cuda()
    with open(os.path.join("data", "dec_kwargs.json"), "w") as fi:
        json.dump(dec_kwargs, fi, indent=4)

    encoder_optimizer = optim.Adam(
        params=[p for p in encoder.parameters() if p.requires_grad],
        lr=learning_rate,
        weight_decay=args.wdecay)

    decoder_optimizer = optim.Adam(
        params=[p for p in decoder.parameters() if p.requires_grad],
        lr=learning_rate,
        weight_decay=args.wdecay)

    encoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        encoder_optimizer, args.epochs, eta_min=args.min_lr)
    decoder_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        decoder_optimizer, args.epochs, eta_min=args.min_lr)

    da_rnn_net = DaRnnNet(encoder, decoder, encoder_optimizer,
                          decoder_optimizer, encoder_scheduler,
                          decoder_scheduler)

    return train_cfg, da_rnn_net
Example #10
0
    def __init__(self, X_dim, Y_dim, encoder_hidden_size=64, decoder_hidden_size=64,
                 linear_dropout=0, T=10, learning_rate=1e-5, batch_size=128, decay_rate=0.95):
        self.T = T
        self.decay_rate = decay_rate
        self.batch_size = batch_size
        self.X_dim = X_dim
        self.Y_dim = Y_dim

        self.encoder = Encoder(X_dim, encoder_hidden_size, T, linear_dropout).to(device)
        self.decoder = Decoder(encoder_hidden_size, decoder_hidden_size, T, linear_dropout, Y_dim).to(device)

        self.encoder_optim = torch.optim.Adam(params=self.encoder.parameters(), lr=learning_rate)
        self.decoder_optim = torch.optim.Adam(params=self.decoder.parameters(), lr=learning_rate)
        self.loss_func = torch.nn.MSELoss()
Example #11
0
    def __init__(self, input_size, output_size):
        super(RNN, self).__init__()

        self.encoder = Encoder(input_size)
        self.decoder = Decoder(output_size)

        self.loss = nn.CrossEntropyLoss()
        self.encoder_optimizer = optim.Adam(self.encoder.parameters())
        self.decoder_optimizer = optim.Adam(self.decoder.parameters())

        sos, eos = torch.LongTensor(1, 1).zero_(), torch.LongTensor(1, 1).zero_()
        sos[0, 0], eos[0, 0] = 0, 1

        self.sos, self.eos = sos, eos
 def __init__(self):
     self.model = get_model().cuda()
     self.ctc_loss = CTCLoss(size_average=True)
     self.decoder = Decoder()
     # self.optimizer = optim.Adam(self.model.parameters(), lr=configs.lr, weight_decay=configs.l2_weight_decay)
     self.optimizer = optim.ASGD(self.model.parameters(),
                                 lr=configs.lr,
                                 weight_decay=configs.l2_weight_decay)
     self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(
         self.optimizer,
         'min',
         patience=configs.lr_scheduler_patience,
         factor=configs.lr_scheduler_factor,
         verbose=True)
     self.epoch_idx = 0
     self.min_avg_dist = 1000.
Example #13
0
    def __init__(self, temp, latent_num, latent_dim):
        super(Model, self).__init__()
        if type(temp) != torch.Tensor:
            temp = torch.tensor(temp)
        self.__temp = temp
        self.latent_num = latent_num
        self.latent_dim = latent_dim
        self.encoder = Encoder(latent_num=latent_num, latent_dim=latent_dim)
        self.decoder = Decoder(latent_num=latent_num, latent_dim=latent_dim)
        if 'ExpTDModel' in  str(self.__class__):
            self.prior = ExpRelaxedCategorical(temp, probs=torch.ones(latent_dim).cuda())
        else:
            self.prior = dist.RelaxedOneHotCategorical(temp, probs=torch.ones(latent_dim).cuda())
        self.initialize()

        self.softmax = nn.Softmax(dim=-1)
Example #14
0
    def __init__(self,
                 word2idx,
                 emb_size,
                 hidden_sizes,
                 dropout,
                 rnn_type="LSTM",
                 pretrained_embs=None,
                 fixed_embs=False,
                 tied=None):
        super(RNNLanguageModel, self).__init__()

        self.encoder = Encoder(word2idx, emb_size, pretrained_embs, fixed_embs)
        self.decoder = Decoder(len(word2idx), hidden_sizes[-1], tied,
                               self.encoder)

        self.rnn = StackedRNN(rnn_type, emb_size, hidden_sizes, dropout)
        self.drop = nn.Dropout(dropout)
Example #15
0
def darnn(train_data: TrainingData, 
           n_targets: int, 
           encoder_hidden_size: int, 
           decoder_hidden_size: int,
           T: int, 
           learning_rate=0.002, 
           batch_size=32):
    train_cfg = TrainingConfig(T, int(train_data.features.shape[0] * 0.7), batch_size, nn.MSELoss())
    print(f"Training size: {train_cfg.train_size:d}.")
    enc_kwargs = {"input_size": train_data.features.shape[1], "hidden_size": encoder_hidden_size, "T": T}
    encoder = Encoder(**enc_kwargs).to(device)
    dec_kwargs = {"encoder_hidden_size": encoder_hidden_size,"decoder_hidden_size": decoder_hidden_size, "T": T, "out_features": n_targets}
    decoder = Decoder(**dec_kwargs).to(device)
    encoder_optimizer = optim.Adam(params=[p for p in encoder.parameters() if p.requires_grad],lr=learning_rate)
    decoder_optimizer = optim.Adam(params=[p for p in decoder.parameters() if p.requires_grad],lr=learning_rate)
    da_rnn_net = Darnn_Net(encoder, decoder, encoder_optimizer, decoder_optimizer)
    return train_cfg, da_rnn_net
    def __init__(self,
                 input_dim_encoder: int,
                 hidden_dim_encoder: int,
                 output_dim_encoder: int,
                 dropout_p_encoder: float,
                 output_dim_h_decoder: int,
                 nb_classes: int,
                 dropout_p_decoder: float,
                 max_out_t_steps: int) \
            -> None:
        """Baseline method for audio captioning with Clotho dataset.

        :param input_dim_encoder: Input dimensionality of the encoder.
        :type input_dim_encoder: int
        :param hidden_dim_encoder: Hidden dimensionality of the encoder.
        :type hidden_dim_encoder: int
        :param output_dim_encoder: Output dimensionality of the encoder.
        :type output_dim_encoder: int
        :param dropout_p_encoder: Encoder RNN dropout.
        :type dropout_p_encoder: float
        :param output_dim_h_decoder: Hidden output dimensionality of the decoder.
        :type output_dim_h_decoder: int
        :param nb_classes: Amount of output classes.
        :type nb_classes: int
        :param dropout_p_decoder: Decoder RNN dropout.
        :type dropout_p_decoder: float
        :param max_out_t_steps: Maximum output time-steps of the decoder.
        :type max_out_t_steps: int
        """
        super().__init__()

        self.max_out_t_steps: int = max_out_t_steps

        self.encoder: Module = Encoder(input_dim=input_dim_encoder,
                                       hidden_dim=hidden_dim_encoder,
                                       output_dim=output_dim_encoder,
                                       dropout_p=dropout_p_encoder)

        self.decoder: Module = Decoder(input_dim=output_dim_encoder * 2,
                                       output_dim=output_dim_h_decoder,
                                       nb_classes=nb_classes,
                                       dropout_p=dropout_p_decoder)
Example #17
0
def TCHA(train_data: TrainData, n_targs: int, bidirec=False, num_layer=1, encoder_hidden_size=64, decoder_hidden_size=64,
         T=10, learning_rate=0.01, batch_size=128, interval=1, split=0.7, isMean=False):
    train_cfg = TrainConfig(T, int(train_data.feats.shape[0] * split), batch_size, nn.MSELoss(), interval, T, isMean)
    logger.info(f"Training size: {train_cfg.train_size:d}.")

    enc_args = {"input_size": train_data.feats.shape[1], "hidden_size": encoder_hidden_size, "T": T,
                  "bidirec": bidirec, "num_layer": num_layer}
    encoder = Encoder(**enc_args).to(device)

    dec_args = {"encoder_hidden_size": encoder_hidden_size, "decoder_hidden_size": decoder_hidden_size, "T": T,
                  "out_feats": n_targs, "bidirec": bidirec, "num_layer": num_layer}
    decoder = Decoder(**dec_args).to(device)

    encoder_optimizer = optim.Adam(
        params=[p for p in encoder.parameters() if p.requires_grad],
        lr=learning_rate)
    decoder_optimizer = optim.Adam(
        params=[p for p in decoder.parameters() if p.requires_grad],
        lr=learning_rate)
    tcha = TCHA_Net(encoder, decoder, encoder_optimizer, decoder_optimizer)

    return train_cfg, tcha
Example #18
0
    def __init__(self,
                 word2idx,
                 emb_size,
                 hidden_sizes,
                 dropout,
                 rnn_type="LSTM",
                 pretrained_embs=None,
                 fixed_embs=False,
                 tied=None):
        super(BidirectionalLanguageModel, self).__init__()
        self.drop = nn.Dropout(dropout)

        self.encoder = Encoder(word2idx, emb_size, pretrained_embs, fixed_embs)
        self.decoder = Decoder(len(word2idx), hidden_sizes[-1], tied,
                               self.encoder)

        self.forward_lstm = StackedRNN(rnn_type, emb_size, hidden_sizes,
                                       dropout)
        self.backward_lstm = StackedRNN(rnn_type, emb_size, hidden_sizes,
                                        dropout)

        self.rnn_type = rnn_type
        self.hidden_sizes = hidden_sizes
        self.nlayers = len(hidden_sizes)
Example #19
0
    def __init__(self, embed_dim=300, hidden_dim=256, inner_dim=2048,
                 n_head=2, N_en=6, N_de=6, dropout=0.1,
                 vocab_size=5000, sos_idx=2, eos_idx=3, pad_idx=0, unk_idx=1,
                 max_src_len=100, max_tgt_len=20, args=False):
        
        super(Transformer, self).__init__()

        #===Test the GPU availability
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        #--Token indexes & Properties
        self.sos, self.eos, self.pad, self.unk = sos_idx, eos_idx, pad_idx, unk_idx
        self.max_src_len = max_src_len
        self.max_tgt_len = max_tgt_len
        self.scale = embed_dim ** 0.5

        #===Base model(attn, enc, dec, ff)
        max_len = max(max_src_len, max_tgt_len)
        attn_enc_layer = ATTNLayer(
            embed_dim, n_head, hidden_dim, inner_dim, dropout, max_len, False)
        attn_dec_layer = ATTNLayer(
            embed_dim, n_head, hidden_dim, inner_dim, dropout, max_len, True)


        #===Main Archetecture(enc, dec)
        self.encoder = Encoder(attn_enc_layer, N_en, True)
        self.decoder = Decoder(attn_dec_layer, N_de, True)
        
        #===Embedding setting(src, tgt)
        self.embed = nn.Embedding(vocab_size, embed_dim)

        #===Fianl FC(logit2vocab)
        self.final = nn.Linear(embed_dim, vocab_size)

        #===Loss
        self.NLL = nn.NLLLoss(reduction='sum')
Example #20
0
        def set_params(train_data, device, **da_rnn_kwargs):
            train_configs = TrainConfig(da_rnn_kwargs["time_step"],
                                        int(train_data.shape[0] * 0.95),
                                        da_rnn_kwargs["batch_size"],
                                        nn.MSELoss())

            enc_kwargs = {
                "input_size": train_data.shape[1],
                "hidden_size": da_rnn_kwargs["en_hidden_size"],
                "time_step":
                int(da_rnn_kwargs["time_step"] / self.predict_size)
            }
            dec_kwargs = {
                "encoder_hidden_size": da_rnn_kwargs["en_hidden_size"],
                "decoder_hidden_size": da_rnn_kwargs["de_hidden_size"],
                "time_step":
                int(da_rnn_kwargs["time_step"] / self.predict_size),
                "out_feats": da_rnn_kwargs["target_cols"]
            }
            encoder = Encoder(**enc_kwargs).to(device)
            decoder = Decoder(**dec_kwargs).to(device)

            encoder_optimizer = optim.Adam(
                params=[p for p in encoder.parameters() if p.requires_grad],
                lr=da_rnn_kwargs["learning_rate"],
                betas=(0.9, 0.999),
                eps=1e-08)
            decoder_optimizer = optim.Adam(
                params=[p for p in decoder.parameters() if p.requires_grad],
                lr=da_rnn_kwargs["learning_rate"],
                betas=(0.9, 0.999),
                eps=1e-08)
            da_rnn_net = DaRnnNet(encoder, decoder, encoder_optimizer,
                                  decoder_optimizer)

            return train_configs, da_rnn_net
    def __init__(self, config, vocab_size, PAD_token=0):
        super(DFVAE, self).__init__()
        self.vocab_size = vocab_size
        self.maxlen = config['maxlen']
        self.clip = config['clip']
        self.lambda_gp = config['lambda_gp']
        self.temp = config['temp']

        self.embedder = nn.Embedding(vocab_size,
                                     config['emb_size'],
                                     padding_idx=PAD_token)
        self.utt_encoder = Encoder(self.embedder, config['emb_size'],
                                   config['n_hidden'], True,
                                   config['n_layers'], config['noise_radius'])
        self.context_encoder = ContextEncoder(self.utt_encoder,
                                              config['n_hidden'] * 2 + 2,
                                              config['n_hidden'], 1,
                                              config['noise_radius'])
        self.prior_net = Variation(config['n_hidden'],
                                   config['z_size'])  # p(e|c)
        self.post_net = Variation(config['n_hidden'] * 3,
                                  config['z_size'])  # q(e|c,x)

        #self.prior_highway = nn.Linear(config['n_hidden'], config['n_hidden'])
        #self.post_highway = nn.Linear(config['n_hidden'] * 3, config['n_hidden'])
        self.postflow1 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.postflow2 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.postflow3 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.priorflow1 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)
        self.priorflow2 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)
        self.priorflow3 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)

        self.post_generator = nn_.SequentialFlow(self.postflow1,
                                                 self.postflow2,
                                                 self.postflow3)
        self.prior_generator = nn_.SequentialFlow(self.priorflow1,
                                                  self.priorflow2,
                                                  self.priorflow3)

        self.decoder = Decoder(self.embedder,
                               config['emb_size'],
                               config['n_hidden'] + config['z_size'],
                               vocab_size,
                               n_layers=1)

        self.optimizer_AE = optim.SGD(
            list(self.context_encoder.parameters()) +
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.decoder.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters())
            #+list(self.prior_highway.parameters())
            #+list(self.post_highway.parameters())
            ,
            lr=config['lr_ae'])
        self.optimizer_G = optim.RMSprop(
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters())
            #+list(self.prior_highway.parameters())
            #+list(self.post_highway.parameters())
            ,
            lr=config['lr_gan_g'])

        #self.optimizer_D = optim.RMSprop(self.discriminator.parameters(), lr=config['lr_gan_d'])

        self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE,
                                                         step_size=10,
                                                         gamma=0.6)

        self.criterion_ce = nn.CrossEntropyLoss()
Example #22
0
    def __init__(self, config, vocab_size, PAD_token=0):
        super(DialogWAE, self).__init__()
        self.vocab_size = vocab_size
        self.maxlen = config['maxlen']
        self.clip = config['clip']
        self.lambda_gp = config['lambda_gp']
        self.temp = config['temp']

        self.embedder = nn.Embedding(vocab_size,
                                     config['emb_size'],
                                     padding_idx=PAD_token)
        self.utt_encoder = Encoder(self.embedder, config['emb_size'],
                                   config['n_hidden'], True,
                                   config['n_layers'], config['noise_radius'])
        self.context_encoder = ContextEncoder(self.utt_encoder,
                                              config['n_hidden'] * 2 + 2,
                                              config['n_hidden'], 1,
                                              config['noise_radius'])
        self.prior_net = Variation(config['n_hidden'],
                                   config['z_size'])  # p(e|c)
        self.post_net = Variation(config['n_hidden'] * 3,
                                  config['z_size'])  # q(e|c,x)

        self.post_generator = nn.Sequential(
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05,
                           momentum=0.1), nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(), nn.Linear(config['z_size'], config['z_size']))
        self.post_generator.apply(self.init_weights)

        self.prior_generator = nn.Sequential(
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05,
                           momentum=0.1), nn.ReLU(),
            nn.Linear(config['z_size'], config['z_size']),
            nn.BatchNorm1d(config['z_size'], eps=1e-05, momentum=0.1),
            nn.ReLU(), nn.Linear(config['z_size'], config['z_size']))
        self.prior_generator.apply(self.init_weights)

        self.decoder = Decoder(self.embedder,
                               config['emb_size'],
                               config['n_hidden'] + config['z_size'],
                               vocab_size,
                               n_layers=1)

        self.discriminator = nn.Sequential(
            nn.Linear(config['n_hidden'] + config['z_size'],
                      config['n_hidden'] * 2),
            nn.BatchNorm1d(config['n_hidden'] * 2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config['n_hidden'] * 2, config['n_hidden'] * 2),
            nn.BatchNorm1d(config['n_hidden'] * 2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config['n_hidden'] * 2, 1),
        )
        self.discriminator.apply(self.init_weights)

        self.optimizer_AE = optim.SGD(list(self.context_encoder.parameters()) +
                                      list(self.post_net.parameters()) +
                                      list(self.post_generator.parameters()) +
                                      list(self.decoder.parameters()),
                                      lr=config['lr_ae'])
        self.optimizer_G = optim.RMSprop(
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters()),
            lr=config['lr_gan_g'])
        self.optimizer_D = optim.RMSprop(self.discriminator.parameters(),
                                         lr=config['lr_gan_d'])

        self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE,
                                                         step_size=10,
                                                         gamma=0.6)

        self.criterion_ce = nn.CrossEntropyLoss()
Example #23
0
    def __init__(self, config, api, PAD_token=0):
        super(CVAE, self).__init__()
        assert api.rev_vocab['<pad>'] == PAD_token
        self.vocab = api.vocab
        self.vocab_size = len(self.vocab)
        self.embed_size = config.emb_size
        self.sent_class_size = config.sent_class
        self.sent_emb_size = config.sent_emb_size
        self.hidden_size = config.n_hidden
        self.bow_size = config.bow_size
        self.rev_vocab = api.rev_vocab
        self.dropout = config.dropout
        self.go_id = self.rev_vocab["<s>"]
        self.eos_id = self.rev_vocab["</s>"]
        self.maxlen = config.maxlen
        self.clip = config.clip
        self.temp = config.temp
        self.full_kl_step = config.full_kl_step
        self.z_size = config.z_size
        self.init_w = config.init_weight
        self.softmax = nn.Softmax(dim=1)
        self.bidirectional = config.bidirectional
        self.lr_ae = config.lr_ae

        # 如果LSTM双向,则两个方向拼接在一起
        self.encoder_output_size = self.hidden_size * (1 +
                                                       int(self.bidirectional))
        # 标题和首句拼接在一起,得到cvae的condition部分
        self.prior_input_dim = self.encoder_output_size * 2
        # 在prior的基础上再拼接对target的encode结果
        self.post_input_dim = self.prior_input_dim + self.encoder_output_size
        self.decoder_input_size = self.prior_input_dim + self.z_size + self.sent_emb_size

        self.embedder = nn.Embedding(self.vocab_size,
                                     self.embed_size,
                                     padding_idx=PAD_token)
        self.sent_embedder = nn.Embedding(self.sent_class_size,
                                          self.sent_emb_size)
        # 对title, 每一句诗做编码, 默认双向LSTM,将最终的一维拼在一起
        self.seq_encoder = Encoder(embedder=self.embedder,
                                   input_size=config.emb_size,
                                   hidden_size=config.n_hidden,
                                   bidirectional=self.bidirectional,
                                   n_layers=config.n_layers,
                                   noise_radius=config.noise_radius)
        # 先验网络
        self.prior_net = Variation(self.sent_class_size,
                                   self.sent_emb_size,
                                   self.prior_input_dim,
                                   self.z_size,
                                   dropout_rate=self.dropout,
                                   init_weight=self.init_w)
        # 后验网络
        self.post_net = Variation(self.sent_class_size,
                                  self.sent_emb_size,
                                  self.post_input_dim,
                                  self.z_size,
                                  dropout_rate=self.dropout,
                                  init_weight=self.init_w)
        # 词包loss的MLP
        self.bow_project = nn.Sequential(
            nn.Linear(self.decoder_input_size, self.bow_size), nn.LeakyReLU(),
            nn.Dropout(self.dropout), nn.Linear(self.bow_size,
                                                self.vocab_size))
        self.decoder = Decoder(embedder=self.embedder,
                               input_size=self.embed_size,
                               hidden_size=self.hidden_size,
                               vocab_size=self.vocab_size,
                               n_layers=1)
        self.init_decoder_hidden = nn.Sequential(
            nn.Linear(self.decoder_input_size, self.hidden_size),
            nn.BatchNorm1d(self.hidden_size, eps=1e-05, momentum=0.1),
            nn.LeakyReLU())
        # self.post_generator = nn.Sequential(
        #     nn.Linear(self.z_size, self.z_size),
        #     nn.BatchNorm1d(self.z_size, eps=1e-05, momentum=0.1),
        #     nn.LeakyReLU(),
        #     nn.Linear(self.z_size, self.z_size),
        #     nn.BatchNorm1d(self.z_size, eps=1e-05, momentum=0.1),
        #     nn.LeakyReLU(),
        #     nn.Linear(self.z_size, self.z_size)
        # )
        # self.post_generator.apply(self.init_weights)

        # self.prior_generator = nn.Sequential(
        #     nn.Linear(self.z_size, self.z_size),
        #     nn.BatchNorm1d(self.z_size, eps=1e-05, momentum=0.1),
        #     nn.ReLU(),
        #     nn.Dropout(self.dropout),
        #     nn.Linear(self.z_size, self.z_size),
        #     nn.BatchNorm1d(self.z_size, eps=1e-05, momentum=0.1),
        #     nn.ReLU(),
        #     nn.Dropout(self.dropout),
        #     nn.Linear(self.z_size, self.z_size)
        # )
        # self.prior_generator.apply(self.init_weights)

        self.init_decoder_hidden.apply(self.init_weights)
        self.bow_project.apply(self.init_weights)
        self.post_net.apply(self.init_weights)

        # self.optimizer_lead = optim.Adam(list(self.seq_encoder.parameters())\
        #                                + list(self.prior_net.parameters()), lr=self.lr_lead)
        self.optimizer_AE = optim.AdamW(self.parameters(), lr=self.lr_ae)

        # self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE, step_size=10, gamma=0.6)

        self.criterion_ce = nn.CrossEntropyLoss()
        self.softmax = nn.Softmax(dim=1)
        self.criterion_sent_lead = nn.CrossEntropyLoss()
Example #24
0
    def __init__(self,
                 embed_dim=300, hidden_dim=256, latent_dim=16, 
                 teacher_forcing=0, dropout=0, n_direction=1, n_parallel=1, 
                 max_src_len=100, max_tgt_len=20,
                 vocab_size=5000, sos_idx=2, eos_idx=3, pad_idx=0, unk_idx=1,
                 k=0.0025, x0=2500, af='logistic', attn=False,
                 args=False):
        
        super().__init__()
        #===Argument parser activated
        if args :
            vocab_size = args.vocab_size
            embed_dim, hidden_dim, latent_dim = args.embedding_dimension, args.hidden_dimension, args.latent_dimension
            teacher_forcing, dropout, n_direction, n_parallel = args.teacher_forcing, args.dropout, args.n_direction, args.n_parallel
            max_src_len, max_tgt_len = args.max_src_length, args.max_tgt_length
            sos_idx, eos_idx, pad_idx, unk_idx = args.sos_idx, args.eos_idx, args.pad_idx, args.unk_idx
            k ,x0, af = args.k, args.x0, args.af
            attn = args.attention
            
        #===Test the GPU availability
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor

        #===Parameters
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.hidden_n = n_direction * n_parallel #bidirectional or parallel layers
        self.max_src_len = max_src_len
        self.max_tgt_len = max_tgt_len
        self.dropout = nn.Dropout(p=dropout)
        self.teacher_forcing = teacher_forcing

        #==Variational==
        self.k = k
        self.x0 = x0
        self.af = af

        #==Attention Mechanism
        self.attn = attn

        #===Tokens Indices
        self.sos, self.eos, self.pad, self.unk = sos_idx, eos_idx, pad_idx, unk_idx

        #===Embedding
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.embed.to(self.device)

        #===Base layers in en/de
        gru_layer_en = RNNLayer(embed_dim, hidden_dim, n_parallel)
        gru_layer_de = RNNLayer(embed_dim, hidden_dim, n_parallel)
        
        #===Main Archetecture(enc, dec)
        self.encoder = Encoder(gru_layer_en, 1)
        self.decoder = Decoder(gru_layer_de, 1)

        #===VAE( latent z space then to hidden context)
        self.hidden2mean = nn.Linear(hidden_dim * self.hidden_n, latent_dim)
        self.hidden2logv = nn.Linear(hidden_dim * self.hidden_n, latent_dim)
        self.latent2hidden = nn.Linear(latent_dim, hidden_dim * self.hidden_n)

        #===Output for generating
        self.outputs2vocab = nn.Linear(hidden_dim * n_direction, vocab_size)

        #===Loss function
        self.NLL = nn.NLLLoss(reduction='sum', ignore_index=self.pad)
Example #25
0
    def __init__(self, config, api, PAD_token=0):
        super(CVAE, self).__init__()
        self.vocab = api.vocab
        self.vocab_size = len(self.vocab)
        self.rev_vocab = api.rev_vocab
        self.go_id = self.rev_vocab["<s>"]
        self.eos_id = self.rev_vocab["</s>"]
        self.maxlen = config.maxlen
        self.clip = config.clip
        self.temp = config.temp
        self.full_kl_step = config.full_kl_step
        self.z_size = config.z_size
        self.init_w = config.init_weight
        self.softmax = nn.Softmax(dim=1)

        self.embedder = nn.Embedding(self.vocab_size,
                                     config.emb_size,
                                     padding_idx=PAD_token)
        # 对title, 每一句诗做编码
        self.seq_encoder = Encoder(embedder=self.embedder,
                                   input_size=config.emb_size,
                                   hidden_size=config.n_hidden,
                                   bidirectional=True,
                                   n_layers=config.n_layers,
                                   noise_radius=config.noise_radius)

        # 先验网络的输入是 标题encode结果 + 上一句诗过encoder的结果 + 上一句情感过encoder的结果
        self.prior_net = Variation(config.n_hidden * 4,
                                   config.z_size,
                                   dropout_rate=config.dropout,
                                   init_weight=self.init_w)
        # 后验网络,再加上x的2*hidden
        # self.post_net = Variation(config.n_hidden * 6, config.z_size*2)
        self.post_net = Variation(config.n_hidden * 6,
                                  config.z_size,
                                  dropout_rate=config.dropout,
                                  init_weight=self.init_w)
        # 词包loss的MLP
        self.bow_project = nn.Sequential(
            nn.Linear(config.n_hidden * 4 + config.z_size, 400),
            nn.LeakyReLU(), nn.Dropout(config.dropout),
            nn.Linear(400, self.vocab_size))
        self.init_decoder_hidden = nn.Sequential(
            nn.Linear(config.n_hidden * 4 + config.z_size, config.n_hidden),
            nn.BatchNorm1d(config.n_hidden, eps=1e-05, momentum=0.1),
            nn.LeakyReLU())
        # self.post_generator = nn.Sequential(
        #     nn.Linear(config.z_size, config.z_size),
        #     nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1),
        #     nn.LeakyReLU(),
        #     nn.Linear(config.z_size, config.z_size),
        #     nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1),
        #     nn.LeakyReLU(),
        #     nn.Linear(config.z_size, config.z_size)
        # )
        # self.post_generator.apply(self.init_weights)

        # self.prior_generator = nn.Sequential(
        #     nn.Linear(config.z_size, config.z_size),
        #     nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1),
        #     nn.ReLU(),
        #     nn.Dropout(config.dropout),
        #     nn.Linear(config.z_size, config.z_size),
        #     nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1),
        #     nn.ReLU(),
        #     nn.Dropout(config.dropout),
        #     nn.Linear(config.z_size, config.z_size)
        # )
        # self.prior_generator.apply(self.init_weights)

        self.init_decoder_hidden.apply(self.init_weights)
        self.bow_project.apply(self.init_weights)
        self.post_net.apply(self.init_weights)

        self.decoder = Decoder(embedder=self.embedder,
                               input_size=config.emb_size,
                               hidden_size=config.n_hidden,
                               vocab_size=self.vocab_size,
                               n_layers=1)

        # self.optimizer_lead = optim.Adam(list(self.seq_encoder.parameters())\
        #                                + list(self.prior_net.parameters()), lr=config.lr_lead)
        self.optimizer_AE = optim.Adam(list(self.seq_encoder.parameters())\
                                       + list(self.prior_net.parameters())\
                                       # + list(self.prior_generator.parameters())

                                       + list(self.post_net.parameters())\
                                       # + list(self.post_generator.parameters())

                                       + list(self.bow_project.parameters())\
                                       + list(self.init_decoder_hidden.parameters())\
                                       + list(self.decoder.parameters()), lr=config.lr_ae)

        # self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE, step_size=10, gamma=0.6)

        self.criterion_ce = nn.CrossEntropyLoss()
        self.softmax = nn.Softmax(dim=1)
        self.criterion_sent_lead = nn.CrossEntropyLoss()
Example #26
0
    return y_pred


if __name__ == "__main__":
    debug = False
    save_plots = False

    with open(os.path.join("data", "enc_kwargs.json"), "r") as fi:
        enc_kwargs = json.load(fi)
    enc = Encoder(**enc_kwargs)
    enc.load_state_dict(
        torch.load(os.path.join("data", "encoder.torch"), map_location=device))

    with open(os.path.join("data", "dec_kwargs.json"), "r") as fi:
        dec_kwargs = json.load(fi)
    dec = Decoder(**dec_kwargs)
    dec.load_state_dict(
        torch.load(os.path.join("data", "decoder.torch"), map_location=device))

    scaler = joblib.load(os.path.join("data", "scaler.pkl"))
    raw_data = pd.read_csv(os.path.join("data", "nasdaq100_padding.csv"),
                           nrows=100 if debug else None)
    targ_cols = ("NDX", )
    data = preprocess_data(raw_data, targ_cols, scaler)

    with open(os.path.join("data", "da_rnn_kwargs.json"), "r") as fi:
        da_rnn_kwargs = json.load(fi)
    final_y_pred = predict(enc, dec, data, **da_rnn_kwargs)

    plt.figure()
    plt.plot(final_y_pred, label='Predicted')
Example #27
0
def init_transformer_model(args,
                           vocab,
                           train=True,
                           is_factorized=False,
                           r=100):
    """
    Initiate a new transformer object
    """
    if args.feat_extractor == 'emb_cnn':
        hidden_size = int(
            math.floor((args.sample_rate * args.window_size) / 2) + 1)
        hidden_size = int(math.floor(hidden_size - 41) / 2 + 1)
        hidden_size = int(math.floor(hidden_size - 21) / 2 + 1)
        hidden_size *= 32
        args.dim_input = hidden_size
    elif args.feat_extractor == 'vgg_cnn':
        hidden_size = int(
            math.floor((args.sample_rate * args.window_size) / 2) + 1)  # 161
        hidden_size = int(math.floor(int(math.floor(hidden_size) / 2) /
                                     2)) * 128  # divide by 2 for maxpooling
        args.dim_input = hidden_size
        if args.feat == "logfbank":
            args.dim_input = 2560
    elif args.feat_extractor == 'large_cnn':
        hidden_size = int(
            math.floor((args.sample_rate * args.window_size) / 2) + 1)  # 161
        hidden_size = int(math.floor(int(math.floor(hidden_size) / 2) /
                                     2)) * 64  # divide by 2 for maxpooling
        args.dim_input = hidden_size
    else:
        print("the model is initialized without feature extractor")

    num_enc_layers = args.num_enc_layers
    num_dec_layers = args.num_dec_layers
    num_heads = args.num_heads
    dim_model = args.dim_model
    dim_key = args.dim_key
    dim_value = args.dim_value
    dim_input = args.dim_input
    dim_inner = args.dim_inner
    dim_emb = args.dim_emb
    src_max_len = args.src_max_len
    tgt_max_len = args.tgt_max_len
    dropout = args.dropout
    emb_trg_sharing = args.emb_trg_sharing
    feat_extractor = args.feat_extractor

    encoder = Encoder(num_enc_layers,
                      num_heads=num_heads,
                      dim_model=dim_model,
                      dim_key=dim_key,
                      dim_value=dim_value,
                      dim_input=dim_input,
                      dim_inner=dim_inner,
                      src_max_length=src_max_len,
                      dropout=dropout,
                      is_factorized=is_factorized,
                      r=r)
    decoder = Decoder(vocab,
                      num_layers=num_dec_layers,
                      num_heads=num_heads,
                      dim_emb=dim_emb,
                      dim_model=dim_model,
                      dim_inner=dim_inner,
                      dim_key=dim_key,
                      dim_value=dim_value,
                      trg_max_length=tgt_max_len,
                      dropout=dropout,
                      emb_trg_sharing=emb_trg_sharing,
                      is_factorized=is_factorized,
                      r=r)
    decoder = decoder if train else decoder
    model = Transformer(encoder,
                        decoder,
                        vocab,
                        feat_extractor=feat_extractor,
                        train=train)

    return model
Example #28
0
    def __init__(self,
                 num_steps,
                 x_size,
                 window_size,
                 z_what_size,
                 rnn_hidden_size,
                 encoder_net=[],
                 decoder_net=[],
                 predict_net=[],
                 embed_net=None,
                 bl_predict_net=[],
                 non_linearity='ReLU',
                 decoder_output_bias=None,
                 decoder_output_use_sigmoid=False,
                 use_masking=True,
                 use_baselines=True,
                 baseline_scalar=None,
                 scale_prior_mean=3.0,
                 scale_prior_sd=0.1,
                 pos_prior_mean=0.0,
                 pos_prior_sd=1.0,
                 likelihood_sd=0.3,
                 use_cuda=False):

        super(AIR, self).__init__()

        self.num_steps = num_steps
        self.x_size = x_size
        self.window_size = window_size
        self.z_what_size = z_what_size
        self.rnn_hidden_size = rnn_hidden_size
        self.use_masking = use_masking
        self.use_baselines = use_baselines
        self.baseline_scalar = baseline_scalar
        self.likelihood_sd = likelihood_sd
        self.use_cuda = use_cuda
        prototype = torch.tensor(0.).cuda() if use_cuda else torch.tensor(0.)
        self.options = dict(dtype=prototype.dtype, device=prototype.device)

        self.z_pres_size = 1
        self.z_where_size = 3
        # By making these parameters they will be moved to the gpu
        # when necessary. (They are not registered with pyro for
        # optimization.)
        self.z_where_loc_prior = nn.Parameter(torch.FloatTensor(
            [scale_prior_mean, pos_prior_mean, pos_prior_mean]),
                                              requires_grad=False)
        self.z_where_scale_prior = nn.Parameter(torch.FloatTensor(
            [scale_prior_sd, pos_prior_sd, pos_prior_sd]),
                                                requires_grad=False)

        # Create nn modules.
        rnn_input_size = x_size**2 if embed_net is None else embed_net[-1]
        rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size
        nl = getattr(nn, non_linearity)

        self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size)
        self.encode = Encoder(window_size**2, encoder_net, z_what_size, nl)
        self.decode = Decoder(window_size**2, decoder_net, z_what_size,
                              decoder_output_bias, decoder_output_use_sigmoid,
                              nl)
        self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size,
                               self.z_where_size, nl)
        self.embed = Identity() if embed_net is None else MLP(
            x_size**2, embed_net, nl, True)

        self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size)
        self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl)
        self.bl_embed = Identity() if embed_net is None else MLP(
            x_size**2, embed_net, nl, True)

        # Create parameters.
        self.h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.bl_h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.bl_c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.z_where_init = nn.Parameter(torch.zeros(1, self.z_where_size))
        self.z_what_init = nn.Parameter(torch.zeros(1, self.z_what_size))

        if use_cuda:
            self.cuda()
Example #29
0
    def __init__(self, config, api, PAD_token=0, pretrain_weight=None):
        super(PoemWAE, self).__init__()
        self.vocab = api.vocab
        self.vocab_size = len(self.vocab)
        self.rev_vocab = api.rev_vocab
        self.go_id = self.rev_vocab["<s>"]
        self.eos_id = self.rev_vocab["</s>"]
        self.maxlen = config.maxlen
        self.clip = config.clip
        self.lambda_gp = config.lambda_gp
        self.lr_gan_g = config.lr_gan_g
        self.lr_gan_d = config.lr_gan_d
        self.n_d_loss = config.n_d_loss
        self.temp = config.temp
        self.init_w = config.init_weight

        self.embedder = nn.Embedding(self.vocab_size,
                                     config.emb_size,
                                     padding_idx=PAD_token)
        if pretrain_weight is not None:
            self.embedder.weight.data.copy_(torch.from_numpy(pretrain_weight))
        # 用同一个seq_encoder来编码标题和前后两句话
        self.seq_encoder = Encoder(self.embedder, config.emb_size,
                                   config.n_hidden, True, config.n_layers,
                                   config.noise_radius)
        # 由于Poem这里context是title和last sentence双向GRU编码后的直接cat,4*hidden
        # 注意如果使用Poemwar_gmp则使用子类中的prior_net,即混合高斯分布的一个先验分布
        self.prior_net = Variation(config.n_hidden * 4,
                                   config.z_size,
                                   dropout_rate=config.dropout,
                                   init_weight=self.init_w)  # p(e|c)

        # 注意这儿原来是给Dialog那个任务用的,3*hidden
        # Poem数据集上,将title和上一句,另外加上x都分别用双向GRU编码并cat,因此是6*hidden
        self.post_net = Variation(config.n_hidden * 6,
                                  config.z_size,
                                  dropout_rate=config.dropout,
                                  init_weight=self.init_w)

        self.post_generator = nn.Sequential(
            nn.Linear(config.z_size, config.z_size),
            nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1), nn.ReLU(),
            nn.Linear(config.z_size, config.z_size),
            nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1), nn.ReLU(),
            nn.Linear(config.z_size, config.z_size))
        self.post_generator.apply(self.init_weights)

        self.prior_generator = nn.Sequential(
            nn.Linear(config.z_size, config.z_size),
            nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1), nn.ReLU(),
            nn.Linear(config.z_size, config.z_size),
            nn.BatchNorm1d(config.z_size, eps=1e-05, momentum=0.1), nn.ReLU(),
            nn.Linear(config.z_size, config.z_size))
        self.prior_generator.apply(self.init_weights)

        self.init_decoder_hidden = nn.Sequential(
            nn.Linear(config.n_hidden * 4 + config.z_size,
                      config.n_hidden * 4),
            nn.BatchNorm1d(config.n_hidden * 4, eps=1e-05, momentum=0.1),
            nn.ReLU())

        # 由于Poem这里context是title和last sentence双向GRU编码后的直接cat,因此hidden_size变为z_size + 4*hidden
        # 修改:decoder的hidden_size还设为n_hidden, init_hidden使用一个MLP将cat变换为n_hidden
        self.decoder = Decoder(self.embedder,
                               config.emb_size,
                               config.n_hidden * 4,
                               self.vocab_size,
                               n_layers=1)

        self.discriminator = nn.Sequential(
            # 因为Poem的cat两个双向编码,这里改为4*n_hidden + z_size
            nn.Linear(config.n_hidden * 4 + config.z_size,
                      config.n_hidden * 2),
            nn.BatchNorm1d(config.n_hidden * 2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config.n_hidden * 2, config.n_hidden * 2),
            nn.BatchNorm1d(config.n_hidden * 2, eps=1e-05, momentum=0.1),
            nn.LeakyReLU(0.2),
            nn.Linear(config.n_hidden * 2, 1),
        )
        self.discriminator.apply(self.init_weights)

        # optimizer 定义,分别对应三个模块的训练,注意!三个模块的optimizer不相同
        # self.optimizer_AE = optim.SGD(list(self.seq_encoder.parameters())
        self.optimizer_AE = optim.SGD(
            list(self.seq_encoder.parameters()) +
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.init_decoder_hidden.parameters()) +
            list(self.decoder.parameters()),
            lr=config.lr_ae)
        self.optimizer_G = optim.RMSprop(
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters()),
            lr=self.lr_gan_g)
        self.optimizer_D = optim.RMSprop(self.discriminator.parameters(),
                                         lr=self.lr_gan_d)

        self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE,
                                                         step_size=10,
                                                         gamma=0.8)

        self.criterion_ce = nn.CrossEntropyLoss()
def da_rnn(train_data: TrainData,
           n_targs: int,
           learning_rate=0.01,
           encoder_hidden_size=64,
           decoder_hidden_size=64,
           T=10,
           batch_size=128):

    # passed arguments are data, n_targs=len(targ_cols), learning_rate=.001, **da_rnn_kwargs

    #here n_args : int means that this argument takes only an integer as its value
    #train_data = TrainData means that this train_data argument takes only the datatype TrainData that we have defined as its value

    training_data_size_out_of_total = train_data.feats.shape[0] * 0.7

    training_configuration = TrainConfig(T,
                                         int(training_data_size_out_of_total),
                                         batch_size, nn.MSELoss())
    '''
            class TrainConfig(typing.NamedTuple):
                T: int
                train_size: int
                batch_size: int
                loss_func: typing.Callable


            '''

    logger.info(f"Training size: {training_configuration.train_size:d}.")

    encoder_kwargs = {
        "input_size": train_data.feats.shape[1],
        "hidden_size": encoder_hidden_size,
        "T": T
    }

    encoder = Encoder(**encoder_kwargs).to(device)

    with open(os.path.join("data", "enc_kwargs.json"), "w") as fi:
        json.dump(encoder_kwargs, fi, indent=4)

    decoder_kwargs = {
        "encoder_hidden_size": encoder_hidden_size,
        "decoder_hidden_size": decoder_hidden_size,
        "T": T,
        "out_feats": n_targs
    }

    decoder = Decoder(**decoder_kwargs).to(device)

    with open(os.path.join("data", "dec_kwargs.json"), "w") as fi:
        json.dump(decoder_kwargs, fi, indent=4)

    encoder_optimizer = optim.Adam(
        params=[p for p in encoder.parameters() if p.requires_grad],
        lr=learning_rate)

    decoder_optimizer = optim.Adam(
        params=[p for p in decoder.parameters() if p.requires_grad],
        lr=learning_rate)

    da_rnn_net = DaRnnNet(
        encoder, decoder, encoder_optimizer, decoder_optimizer
    )  #-------------------------------return the DA-RNN network

    return training_configuration, da_rnn_net