Beispiel #1
0
def build_encoder(opt, embeddings, embeddings_latt = False, feat_vec_size = 512):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
        embeddings_latt: embeddings of senses if lattice is used    # latt!!!
        feat_vec_size: for adaptable feat_vec_size             # latt!!!
    """
#latt
    if opt.encoder_type == "transformer":
        if embeddings_latt != False:  #latt
            return TransformerEncoder(opt.enc_layers, opt.rnn_size,
                                      opt.heads, opt.transformer_ff,
                                      opt.dropout, embeddings, embeddings_latt, feat_vec_size) #latt
        else:
            return TransformerEncoder(opt.enc_layers, opt.rnn_size,
                                      opt.heads, opt.transformer_ff,
                                      opt.dropout, embeddings, embeddings_latt, feat_vec_size) #latt
#latt
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.rnn_size,
                          opt.cnn_kernel_width,
                          opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings)
    else:
        # "rnn" or "brnn"
        return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.rnn_size, opt.dropout, embeddings,
                          opt.bridge)
Beispiel #2
0
 def from_onmt(cls, onmt_encoder: OnmtTransformerEncoder, 
             device: Optional[torch.device] = None):
     if device is not None and 'cuda' in device.type and torch.cuda.is_available(
     ):
         onmt_encoder.to(device)
     layers = [
         TransformerEncoderLayer.from_onmt(layer)
         for layer in onmt_encoder.transformer
     ]
     return cls(onmt_encoder.embeddings, layers, onmt_encoder.layer_norm)
def build_encoder(opt, embeddings):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        return TransformerEncoder(opt.enc_layers,
                                  opt.enc_rnn_size,
                                  opt.heads,
                                  opt.transformer_ff,
                                  opt.dropout,
                                  embeddings,
                                  ablation=opt.ablation)
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.enc_rnn_size,
                          opt.cnn_kernel_width, opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings)
    else:
        # "rnn" or "brnn"
        return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.enc_rnn_size, opt.dropout, embeddings,
                          opt.bridge)
Beispiel #4
0
def build_encoder(opt, embeddings):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        return TransformerEncoder(opt.enc_layers, opt.enc_rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings)
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.enc_rnn_size,
                          opt.cnn_kernel_width,
                          opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings)
    else:
        # "rnn" or "brnn"
        logger.info('RNNEncoder: type %s, bidir %d, layers %d, '
                    'hidden size %d, dropout %.2f' %
                    (opt.rnn_type, opt.brnn, opt.enc_layers,
                     opt.enc_rnn_size, opt.dropout))
        return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.enc_rnn_size, opt.dropout, embeddings,
                          opt.bridge)
Beispiel #5
0
def build_encoder(opt, embeddings):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        return TransformerEncoder(opt.enc_layers, opt.rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings), None
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.rnn_size,
                          opt.cnn_kernel_width,
                          opt.dropout, embeddings), None
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings), None
    else:
        # "rnn" or "brnn"
        word_encoder = RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.rnn_size, opt.dropout, embeddings, None, opt.bridge)
        if opt.rnn_type == "LSTM":
            emb_size = opt.enc_layers * opt.rnn_size * 2
        else:
            emb_size = opt.enc_layers * opt.rnn_size
        sen_encoder = RNNEncoder(opt.rnn_type, opt.brnn, opt.sen_enc_layers,
                          opt.sen_rnn_size, opt.dropout, None, emb_size, opt.bridge)
        return word_encoder, sen_encoder
Beispiel #6
0
def build_encoder(opt, embeddings, main_encoder=None):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        encoder = TransformerEncoder(opt.enc_layers,
                                     opt.enc_rnn_size,
                                     opt.heads,
                                     opt.transformer_ff,
                                     opt.dropout,
                                     embeddings,
                                     main_encoder=main_encoder,
                                     mtl_opt=opt)
    elif opt.encoder_type == "cnn":
        encoder = CNNEncoder(opt.enc_layers, opt.enc_rnn_size,
                             opt.cnn_kernel_width, opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        encoder = MeanEncoder(opt.enc_layers, embeddings)
    else:
        encoder = RNNEncoder(opt.rnn_type,
                             opt.brnn,
                             opt.enc_layers,
                             opt.enc_rnn_size,
                             opt.dropout,
                             embeddings,
                             opt.bridge,
                             main_encoder=main_encoder,
                             mtl_opt=opt)
    return encoder
Beispiel #7
0
def get_onmt_transformer():
    encoder = TransformerEncoder(
        num_layers=FLAGS.num_layers,
        d_model=FLAGS.d_model,
        heads=FLAGS.n_heads,
        d_ff=FLAGS.d_ff,
        dropout=FLAGS.dropout,
        embeddings=Embeddings(
            word_vec_size=FLAGS.d_model,
            word_vocab_size=FLAGS.input_dim,
            word_padding_idx=FLAGS.src_pad_idx,
            position_encoding=True,
            dropout=FLAGS.dropout
        ),
        attention_dropout=FLAGS.dropout,
        max_relative_positions=0,
    )
    decoder = TransformerDecoder(
        num_layers=FLAGS.num_layers,
        d_model=FLAGS.d_model,
        heads=FLAGS.n_heads,
        d_ff=FLAGS.d_ff,
        copy_attn=False,
        self_attn_type='scaled-dot',
        dropout=FLAGS.dropout,
        embeddings=Embeddings(
            word_vec_size=FLAGS.d_model,
            word_vocab_size=FLAGS.output_dim,
            word_padding_idx=FLAGS.trg_pad_idx,
            position_encoding=True,
            dropout=FLAGS.dropout
        ),
        aan_useffn=False,
        alignment_heads=0,
        alignment_layer=0,
        full_context_alignment=False,
        attention_dropout=FLAGS.dropout,
        max_relative_positions=0,
    )
    
    return NMTModel(encoder, decoder)
Beispiel #8
0
def build_encoder(opt, embeddings):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        encoder = TransformerEncoder(opt.enc_layers, opt.enc_rnn_size,
                                     opt.heads, opt.transformer_ff,
                                     opt.dropout, embeddings)
    elif opt.encoder_type == "cnn":
        encoder = CNNEncoder(opt.enc_layers, opt.enc_rnn_size,
                             opt.cnn_kernel_width, opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        encoder = MeanEncoder(opt.enc_layers, embeddings)
    elif opt.encoder_type == "hr_brnn":
        bi_enc = True
        encoder = HREncoder(opt.rnn_type, bi_enc, opt.enc_layers,
                            opt.enc_rnn_size, opt.dropout, embeddings,
                            opt.bridge)
    elif opt.encoder_type == "seq_hr_brnn":
        bi_enc = True
        encoder = SeqHREncoder(opt.rnn_type, bi_enc, opt.enc_layers,
                               opt.enc_rnn_size, opt.dropout, embeddings,
                               opt.bridge)
    elif opt.encoder_type == "tg_brnn":
        bi_enc = True
        encoder = TGEncoder(opt.rnn_type, bi_enc, opt.enc_layers,
                            opt.enc_rnn_size, opt.dropout, embeddings)
    else:
        bi_enc = 'brnn' in opt.encoder_type
        encoder = RNNEncoder(opt.rnn_type,
                             bi_enc,
                             opt.enc_layers,
                             opt.enc_rnn_size,
                             opt.dropout,
                             embeddings,
                             opt.bridge,
                             use_catSeq_dp=opt.use_catSeq_dp)
    return encoder
def build_encoder(opt, embeddings):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == "transformer":
        return TransformerEncoder(opt.enc_layers, opt.enc_rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings)
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.enc_rnn_size,
                          opt.cnn_kernel_width,
                          opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings)
    elif opt.encoder_type == "rnntreelstm" or opt.encoder_type == "treelstm":
        opt.brnn = True if opt.encoder_type == "rnntreelstm" else False
        return TreeLSTMEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                        opt.rnn_size, opt.dropout, embeddings,
                        opt.bridge, False)
    elif opt.encoder_type == "rnnbitreelstm" or opt.encoder_type == "bitreelstm":
        opt.brnn = True if opt.encoder_type == "rnnbitreelstm" else False
        return TreeLSTMEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                        opt.rnn_size, opt.dropout, embeddings,
                        opt.bridge, True)    
    elif opt.encoder_type == "rnngcn" or opt.encoder_type == "gcn":
        opt.brnn = True if opt.encoder_type == "rnngcn" else False
        return GCNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.rnn_size, opt.dropout, embeddings,
                          opt.bridge, opt.gcn_dropout, 
                          opt.gcn_edge_dropout, opt.n_gcn_layers, 
                          opt.activation, opt.highway)    
    else:
        # "rnn" or "brnn"
        return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.enc_rnn_size, opt.dropout, embeddings,
                          opt.bridge)
Beispiel #10
0
def build_encoder(opt, embeddings, fields=None):
    """
    Various encoder dispatcher function.
    Args:
        opt: the option in current environment.
        embeddings (Embeddings): vocab embeddings for this encoder.
    """
    if opt.encoder_type == 'simple_context_0':
        # bottom n-1 layers are shared
        return SimpleContextTransformerEncoder(
                                  opt.enc_layers - 1, opt.rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings,
                                  selected_ctx=0)
    elif opt.encoder_type == 'simple_context_1':
        # bottom n-1 layers are shared
        return SimpleContextTransformerEncoder(
                                  opt.enc_layers - 1, opt.rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings,
                                  selected_ctx=1)

    elif opt.encoder_type == "transformer":
        return TransformerEncoder(opt.enc_layers, opt.rnn_size,
                                  opt.heads, opt.transformer_ff,
                                  opt.dropout, embeddings)
    elif opt.encoder_type == "cnn":
        return CNNEncoder(opt.enc_layers, opt.rnn_size,
                          opt.cnn_kernel_width,
                          opt.dropout, embeddings)
    elif opt.encoder_type == "mean":
        return MeanEncoder(opt.enc_layers, embeddings)
    else:
        # "rnn" or "brnn"
        return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers,
                          opt.rnn_size, opt.dropout, embeddings,
                          opt.bridge)
Beispiel #11
0
def build_base_model(model_opt,
                     fields,
                     gpu,
                     args,
                     checkpoint=None,
                     gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        fields (dict[str, torchtext.data.Field]):
            `Field` objects for the model.
        gpu (bool): whether to use gpu.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # for back compat when attention_dropout was not defined
    try:
        model_opt.attention_dropout
    except AttributeError:
        model_opt.attention_dropout = model_opt.dropout

    # Build embeddings.
    if model_opt.model_type == "text" or model_opt.model_type == "vec":
        src_field = fields["src"]
        src_emb = build_embeddings(model_opt, src_field)
    else:
        src_emb = None

    # Build encoder.
    encoder = TransformerEncoder.from_opt(model_opt, src_emb)

    # Build decoder.
    tgt_field = fields["tgt"]
    tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False)

    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        # src/tgt vocab should be the same if `-share_vocab` is specified.
        assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
            "preprocess with -share_vocab if you use share_embeddings"

        tgt_emb.word_lut.weight = src_emb.word_lut.weight

    decoder = TransformerDecoder.from_opt(model_opt, tgt_emb, args)

    # Build NMTModel(= encoder + decoder).
    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model = onmt.models.NMTModel(encoder, decoder)

    # Build Generator.
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_rnn_size,
                      len(fields["tgt"].base_field.vocab)),
            Cast(torch.float32), gen_func)
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx)
        if model_opt.share_decoder_embeddings:
            generator.linear.weight = decoder.embeddings.word_lut.weight

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {
            fix_key(k): v
            for k, v in checkpoint['model'].items()
        }
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)

        if args.model_type == 'decoder_ext':
            w = []
            for i in range(model_opt.dec_layers):
                w.append([
                    decoder.transformer_layers[i].layer_norm_1.weight.data,
                    decoder.transformer_layers[i].layer_norm_1.bias.data,
                    decoder.transformer_layers[i].self_attn.linear_query.
                    weight.data.transpose(-1, -2).contiguous(),
                    decoder.transformer_layers[i].self_attn.linear_keys.weight.
                    data.transpose(-1, -2).contiguous(),
                    decoder.transformer_layers[i].self_attn.linear_values.
                    weight.data.transpose(-1, -2).contiguous(), decoder.
                    transformer_layers[i].self_attn.linear_query.bias.data,
                    decoder.transformer_layers[i].self_attn.linear_keys.bias.
                    data, decoder.transformer_layers[i].self_attn.
                    linear_values.bias.data, decoder.transformer_layers[i].
                    self_attn.final_linear.weight.data.transpose(
                        -1, -2).contiguous(), decoder.transformer_layers[i].
                    self_attn.final_linear.bias.data,
                    decoder.transformer_layers[i].layer_norm_2.weight.data,
                    decoder.transformer_layers[i].layer_norm_2.bias.data,
                    decoder.transformer_layers[i].context_attn.linear_query.
                    weight.data.transpose(
                        -1, -2).contiguous(), decoder.transformer_layers[i].
                    context_attn.linear_keys.weight.data.transpose(
                        -1, -2).contiguous(), decoder.transformer_layers[i].
                    context_attn.linear_values.weight.data.transpose(
                        -1, -2).contiguous(), decoder.transformer_layers[i].
                    context_attn.linear_query.bias.data, decoder.
                    transformer_layers[i].context_attn.linear_keys.bias.data,
                    decoder.transformer_layers[i].context_attn.linear_values.
                    bias.data, decoder.transformer_layers[i].context_attn.
                    final_linear.weight.data.transpose(
                        -1, -2).contiguous(), decoder.transformer_layers[i].
                    context_attn.final_linear.bias.data, decoder.
                    transformer_layers[i].feed_forward.layer_norm.weight.data,
                    decoder.transformer_layers[i].feed_forward.layer_norm.bias.
                    data, decoder.transformer_layers[i].feed_forward.w_1.
                    weight.data.transpose(-1, -2).contiguous(),
                    decoder.transformer_layers[i].feed_forward.w_1.bias.data,
                    decoder.transformer_layers[i].feed_forward.w_2.weight.data.
                    transpose(-1, -2).contiguous(),
                    decoder.transformer_layers[i].feed_forward.w_2.bias.data
                ])
                for i in range(len(w[-1])):
                    w[-1][i] = w[-1][i].cuda()
                if args.data_type == 'fp16':
                    for i in range(len(w[-1])):
                        w[-1][i] = w[-1][i].half()
            decoder_layers = nn.ModuleList([
                FTDecoderLayer(model_opt.heads,
                               model_opt.dec_rnn_size // model_opt.heads, w[i],
                               args) for i in range(model_opt.dec_layers)
            ])
            model.decoder.transformer_layers = decoder_layers
        elif args.model_type == 'decoding_ext':
            vocab_size = len(fields["tgt"].base_field.vocab)
            bos_idx = fields["tgt"].base_field.vocab.stoi[
                fields["tgt"].base_field.init_token]
            eos_idx = fields["tgt"].base_field.vocab.stoi[
                fields["tgt"].base_field.eos_token]
            decoding_weights = DecodingWeights(model_opt.dec_layers,
                                               model_opt.dec_rnn_size,
                                               vocab_size, checkpoint)
            decoding_weights.to_cuda()
            if args.data_type == 'fp16':
                decoding_weights.to_half()
            model.decoder = CustomDecoding(model_opt.dec_layers,
                                           model_opt.heads,
                                           model_opt.dec_rnn_size //
                                           model_opt.heads,
                                           vocab_size,
                                           bos_idx,
                                           eos_idx,
                                           decoding_weights,
                                           args=args)
        elif args.model_type == 'torch_decoding' or args.model_type == 'torch_decoding_with_decoder_ext':
            vocab_size = len(fields["tgt"].base_field.vocab)
            bos_idx = fields["tgt"].base_field.vocab.stoi[
                fields["tgt"].base_field.init_token]
            eos_idx = fields["tgt"].base_field.vocab.stoi[
                fields["tgt"].base_field.eos_token]
            decoding_weights = DecodingWeights(model_opt.dec_layers,
                                               model_opt.dec_rnn_size,
                                               vocab_size, checkpoint)
            decoding_weights.to_cuda()
            if args.data_type == 'fp16':
                decoding_weights.to_half()
            model.decoder = TorchDecoding(model_opt.dec_layers,
                                          model_opt.heads,
                                          model_opt.dec_rnn_size //
                                          model_opt.heads,
                                          vocab_size,
                                          bos_idx,
                                          eos_idx,
                                          decoding_weights,
                                          args=args)

    else:
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)

        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)

    model.generator = generator
    model.to(device)
    if model_opt.model_dtype == 'fp16' and model_opt.optim == 'fusedadam':
        model.half()
    return model
Beispiel #12
0
    '''out = q_layer(input_)
    loss = sum(target- out)
    print("out: ", out)
    print("loss: ", loss)#'''
    print("quantization test: ")
    print(quantize(nn.Sequential(linear), 2, error_checking=True))
    print("=" * 100)
    bigger_model = nn.Sequential(
        nn.Linear(3, 3), nn.Linear(2, 2),
        nn.Sequential(nn.Linear(4, 3),
                      nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))))

    print("bigger quantization test: ")
    print(quantize(bigger_model, 2, error_checking=True))
    print("=" * 100)
    transf = TransformerEncoder(2, 5, 5, 5, 0, PositionalEncoding(0, 10), 0)
    print("transformer test: ")
    print(transf)

    q_transf = quantize(transf, 2, error_checking=False)
    print(q_transf)
    print("=" * 100)
    print("Pruning test: ")
    linear = nn.Linear(4, 4, bias=False)
    print("Weights of linear layer: ", linear.weight)
    prune = PrunedLayer(linear, 0.25)
    print("Weights of pruned layer: ", prune.weight * prune.mask)
    input_p = torch.tensor([3, 6, 2, 1], dtype=torch.float32)
    target_p = torch.tensor([-3, -6, -2, -1], dtype=torch.float32)
    out_p = prune(input_p)
    loss_p = sum(target_p - out_p)