def build_encoder(opt, embeddings, embeddings_latt = False, feat_vec_size = 512): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. embeddings_latt: embeddings of senses if lattice is used # latt!!! feat_vec_size: for adaptable feat_vec_size # latt!!! """ #latt if opt.encoder_type == "transformer": if embeddings_latt != False: #latt return TransformerEncoder(opt.enc_layers, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, embeddings_latt, feat_vec_size) #latt else: return TransformerEncoder(opt.enc_layers, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, embeddings_latt, feat_vec_size) #latt #latt elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) else: # "rnn" or "brnn" return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge)
def from_onmt(cls, onmt_encoder: OnmtTransformerEncoder, device: Optional[torch.device] = None): if device is not None and 'cuda' in device.type and torch.cuda.is_available( ): onmt_encoder.to(device) layers = [ TransformerEncoderLayer.from_onmt(layer) for layer in onmt_encoder.transformer ] return cls(onmt_encoder.embeddings, layers, onmt_encoder.layer_norm)
def build_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.enc_rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, ablation=opt.ablation) elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.enc_rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) else: # "rnn" or "brnn" return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge)
def build_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.enc_rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings) elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.enc_rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) else: # "rnn" or "brnn" logger.info('RNNEncoder: type %s, bidir %d, layers %d, ' 'hidden size %d, dropout %.2f' % (opt.rnn_type, opt.brnn, opt.enc_layers, opt.enc_rnn_size, opt.dropout)) return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge)
def build_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings), None elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings), None elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings), None else: # "rnn" or "brnn" word_encoder = RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, None, opt.bridge) if opt.rnn_type == "LSTM": emb_size = opt.enc_layers * opt.rnn_size * 2 else: emb_size = opt.enc_layers * opt.rnn_size sen_encoder = RNNEncoder(opt.rnn_type, opt.brnn, opt.sen_enc_layers, opt.sen_rnn_size, opt.dropout, None, emb_size, opt.bridge) return word_encoder, sen_encoder
def build_encoder(opt, embeddings, main_encoder=None): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": encoder = TransformerEncoder(opt.enc_layers, opt.enc_rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, main_encoder=main_encoder, mtl_opt=opt) elif opt.encoder_type == "cnn": encoder = CNNEncoder(opt.enc_layers, opt.enc_rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": encoder = MeanEncoder(opt.enc_layers, embeddings) else: encoder = RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge, main_encoder=main_encoder, mtl_opt=opt) return encoder
def get_onmt_transformer(): encoder = TransformerEncoder( num_layers=FLAGS.num_layers, d_model=FLAGS.d_model, heads=FLAGS.n_heads, d_ff=FLAGS.d_ff, dropout=FLAGS.dropout, embeddings=Embeddings( word_vec_size=FLAGS.d_model, word_vocab_size=FLAGS.input_dim, word_padding_idx=FLAGS.src_pad_idx, position_encoding=True, dropout=FLAGS.dropout ), attention_dropout=FLAGS.dropout, max_relative_positions=0, ) decoder = TransformerDecoder( num_layers=FLAGS.num_layers, d_model=FLAGS.d_model, heads=FLAGS.n_heads, d_ff=FLAGS.d_ff, copy_attn=False, self_attn_type='scaled-dot', dropout=FLAGS.dropout, embeddings=Embeddings( word_vec_size=FLAGS.d_model, word_vocab_size=FLAGS.output_dim, word_padding_idx=FLAGS.trg_pad_idx, position_encoding=True, dropout=FLAGS.dropout ), aan_useffn=False, alignment_heads=0, alignment_layer=0, full_context_alignment=False, attention_dropout=FLAGS.dropout, max_relative_positions=0, ) return NMTModel(encoder, decoder)
def build_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": encoder = TransformerEncoder(opt.enc_layers, opt.enc_rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings) elif opt.encoder_type == "cnn": encoder = CNNEncoder(opt.enc_layers, opt.enc_rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": encoder = MeanEncoder(opt.enc_layers, embeddings) elif opt.encoder_type == "hr_brnn": bi_enc = True encoder = HREncoder(opt.rnn_type, bi_enc, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge) elif opt.encoder_type == "seq_hr_brnn": bi_enc = True encoder = SeqHREncoder(opt.rnn_type, bi_enc, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge) elif opt.encoder_type == "tg_brnn": bi_enc = True encoder = TGEncoder(opt.rnn_type, bi_enc, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings) else: bi_enc = 'brnn' in opt.encoder_type encoder = RNNEncoder(opt.rnn_type, bi_enc, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge, use_catSeq_dp=opt.use_catSeq_dp) return encoder
def build_encoder(opt, embeddings): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.enc_rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings) elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.enc_rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) elif opt.encoder_type == "rnntreelstm" or opt.encoder_type == "treelstm": opt.brnn = True if opt.encoder_type == "rnntreelstm" else False return TreeLSTMEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge, False) elif opt.encoder_type == "rnnbitreelstm" or opt.encoder_type == "bitreelstm": opt.brnn = True if opt.encoder_type == "rnnbitreelstm" else False return TreeLSTMEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge, True) elif opt.encoder_type == "rnngcn" or opt.encoder_type == "gcn": opt.brnn = True if opt.encoder_type == "rnngcn" else False return GCNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge, opt.gcn_dropout, opt.gcn_edge_dropout, opt.n_gcn_layers, opt.activation, opt.highway) else: # "rnn" or "brnn" return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.enc_rnn_size, opt.dropout, embeddings, opt.bridge)
def build_encoder(opt, embeddings, fields=None): """ Various encoder dispatcher function. Args: opt: the option in current environment. embeddings (Embeddings): vocab embeddings for this encoder. """ if opt.encoder_type == 'simple_context_0': # bottom n-1 layers are shared return SimpleContextTransformerEncoder( opt.enc_layers - 1, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, selected_ctx=0) elif opt.encoder_type == 'simple_context_1': # bottom n-1 layers are shared return SimpleContextTransformerEncoder( opt.enc_layers - 1, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings, selected_ctx=1) elif opt.encoder_type == "transformer": return TransformerEncoder(opt.enc_layers, opt.rnn_size, opt.heads, opt.transformer_ff, opt.dropout, embeddings) elif opt.encoder_type == "cnn": return CNNEncoder(opt.enc_layers, opt.rnn_size, opt.cnn_kernel_width, opt.dropout, embeddings) elif opt.encoder_type == "mean": return MeanEncoder(opt.enc_layers, embeddings) else: # "rnn" or "brnn" return RNNEncoder(opt.rnn_type, opt.brnn, opt.enc_layers, opt.rnn_size, opt.dropout, embeddings, opt.bridge)
def build_base_model(model_opt, fields, gpu, args, checkpoint=None, gpu_id=None): """Build a model from opts. Args: model_opt: the option loaded from checkpoint. It's important that the opts have been updated and validated. See :class:`onmt.utils.parse.ArgumentParser`. fields (dict[str, torchtext.data.Field]): `Field` objects for the model. gpu (bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. gpu_id (int or NoneType): Which GPU to use. Returns: the NMTModel. """ # for back compat when attention_dropout was not defined try: model_opt.attention_dropout except AttributeError: model_opt.attention_dropout = model_opt.dropout # Build embeddings. if model_opt.model_type == "text" or model_opt.model_type == "vec": src_field = fields["src"] src_emb = build_embeddings(model_opt, src_field) else: src_emb = None # Build encoder. encoder = TransformerEncoder.from_opt(model_opt, src_emb) # Build decoder. tgt_field = fields["tgt"] tgt_emb = build_embeddings(model_opt, tgt_field, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. assert src_field.base_field.vocab == tgt_field.base_field.vocab, \ "preprocess with -share_vocab if you use share_embeddings" tgt_emb.word_lut.weight = src_emb.word_lut.weight decoder = TransformerDecoder.from_opt(model_opt, tgt_emb, args) # Build NMTModel(= encoder + decoder). if gpu and gpu_id is not None: device = torch.device("cuda", gpu_id) elif gpu and not gpu_id: device = torch.device("cuda") elif not gpu: device = torch.device("cpu") model = onmt.models.NMTModel(encoder, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].base_field.vocab)), Cast(torch.float32), gen_func) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: tgt_base_field = fields["tgt"].base_field vocab_size = len(tgt_base_field.vocab) pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token] generator = CopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx) if model_opt.share_decoder_embeddings: generator.linear.weight = decoder.embeddings.word_lut.weight # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = { fix_key(k): v for k, v in checkpoint['model'].items() } # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) if args.model_type == 'decoder_ext': w = [] for i in range(model_opt.dec_layers): w.append([ decoder.transformer_layers[i].layer_norm_1.weight.data, decoder.transformer_layers[i].layer_norm_1.bias.data, decoder.transformer_layers[i].self_attn.linear_query. weight.data.transpose(-1, -2).contiguous(), decoder.transformer_layers[i].self_attn.linear_keys.weight. data.transpose(-1, -2).contiguous(), decoder.transformer_layers[i].self_attn.linear_values. weight.data.transpose(-1, -2).contiguous(), decoder. transformer_layers[i].self_attn.linear_query.bias.data, decoder.transformer_layers[i].self_attn.linear_keys.bias. data, decoder.transformer_layers[i].self_attn. linear_values.bias.data, decoder.transformer_layers[i]. self_attn.final_linear.weight.data.transpose( -1, -2).contiguous(), decoder.transformer_layers[i]. self_attn.final_linear.bias.data, decoder.transformer_layers[i].layer_norm_2.weight.data, decoder.transformer_layers[i].layer_norm_2.bias.data, decoder.transformer_layers[i].context_attn.linear_query. weight.data.transpose( -1, -2).contiguous(), decoder.transformer_layers[i]. context_attn.linear_keys.weight.data.transpose( -1, -2).contiguous(), decoder.transformer_layers[i]. context_attn.linear_values.weight.data.transpose( -1, -2).contiguous(), decoder.transformer_layers[i]. context_attn.linear_query.bias.data, decoder. transformer_layers[i].context_attn.linear_keys.bias.data, decoder.transformer_layers[i].context_attn.linear_values. bias.data, decoder.transformer_layers[i].context_attn. final_linear.weight.data.transpose( -1, -2).contiguous(), decoder.transformer_layers[i]. context_attn.final_linear.bias.data, decoder. transformer_layers[i].feed_forward.layer_norm.weight.data, decoder.transformer_layers[i].feed_forward.layer_norm.bias. data, decoder.transformer_layers[i].feed_forward.w_1. weight.data.transpose(-1, -2).contiguous(), decoder.transformer_layers[i].feed_forward.w_1.bias.data, decoder.transformer_layers[i].feed_forward.w_2.weight.data. transpose(-1, -2).contiguous(), decoder.transformer_layers[i].feed_forward.w_2.bias.data ]) for i in range(len(w[-1])): w[-1][i] = w[-1][i].cuda() if args.data_type == 'fp16': for i in range(len(w[-1])): w[-1][i] = w[-1][i].half() decoder_layers = nn.ModuleList([ FTDecoderLayer(model_opt.heads, model_opt.dec_rnn_size // model_opt.heads, w[i], args) for i in range(model_opt.dec_layers) ]) model.decoder.transformer_layers = decoder_layers elif args.model_type == 'decoding_ext': vocab_size = len(fields["tgt"].base_field.vocab) bos_idx = fields["tgt"].base_field.vocab.stoi[ fields["tgt"].base_field.init_token] eos_idx = fields["tgt"].base_field.vocab.stoi[ fields["tgt"].base_field.eos_token] decoding_weights = DecodingWeights(model_opt.dec_layers, model_opt.dec_rnn_size, vocab_size, checkpoint) decoding_weights.to_cuda() if args.data_type == 'fp16': decoding_weights.to_half() model.decoder = CustomDecoding(model_opt.dec_layers, model_opt.heads, model_opt.dec_rnn_size // model_opt.heads, vocab_size, bos_idx, eos_idx, decoding_weights, args=args) elif args.model_type == 'torch_decoding' or args.model_type == 'torch_decoding_with_decoder_ext': vocab_size = len(fields["tgt"].base_field.vocab) bos_idx = fields["tgt"].base_field.vocab.stoi[ fields["tgt"].base_field.init_token] eos_idx = fields["tgt"].base_field.vocab.stoi[ fields["tgt"].base_field.eos_token] decoding_weights = DecodingWeights(model_opt.dec_layers, model_opt.dec_rnn_size, vocab_size, checkpoint) decoding_weights.to_cuda() if args.data_type == 'fp16': decoding_weights.to_half() model.decoder = TorchDecoding(model_opt.dec_layers, model_opt.heads, model_opt.dec_rnn_size // model_opt.heads, vocab_size, bos_idx, eos_idx, decoding_weights, args=args) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec) model.generator = generator model.to(device) if model_opt.model_dtype == 'fp16' and model_opt.optim == 'fusedadam': model.half() return model
'''out = q_layer(input_) loss = sum(target- out) print("out: ", out) print("loss: ", loss)#''' print("quantization test: ") print(quantize(nn.Sequential(linear), 2, error_checking=True)) print("=" * 100) bigger_model = nn.Sequential( nn.Linear(3, 3), nn.Linear(2, 2), nn.Sequential(nn.Linear(4, 3), nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2)))) print("bigger quantization test: ") print(quantize(bigger_model, 2, error_checking=True)) print("=" * 100) transf = TransformerEncoder(2, 5, 5, 5, 0, PositionalEncoding(0, 10), 0) print("transformer test: ") print(transf) q_transf = quantize(transf, 2, error_checking=False) print(q_transf) print("=" * 100) print("Pruning test: ") linear = nn.Linear(4, 4, bias=False) print("Weights of linear layer: ", linear.weight) prune = PrunedLayer(linear, 0.25) print("Weights of pruned layer: ", prune.weight * prune.mask) input_p = torch.tensor([3, 6, 2, 1], dtype=torch.float32) target_p = torch.tensor([-3, -6, -2, -1], dtype=torch.float32) out_p = prune(input_p) loss_p = sum(target_p - out_p)