def __init__(self, input_size, output_size, z_size, depth, params, embedding=None, highway=False, sbn=None, dropout=0., batchnorm=False, residual=None, bidirectional=False, n_mems=20, memory=None, targets=None, nheads=2): super(ConditionalCoattentiveTransformerLink, self).__init__(input_size, output_size, z_size, depth, params, embedding, highway, dropout=dropout, batchnorm=batchnorm, residual=residual) output_size = int(output_size/n_mems) self.input_to_hidden = nn.Linear(input_size, output_size) self.transformer_enc = TransformerEncoder(SpecialTransformerEncoder(output_size, nheads, dim_feedforward=output_size*n_mems, dropout=dropout, activation='gelu', n_mems=n_mems) , depth) self.transformer_dec = TransformerDecoder(TransformerDecoderLayer(output_size, nheads, dim_feedforward=output_size, dropout=dropout, activation='gelu'), depth) self.memory, self.targets = memory, targets self.pe = PositionalEncoding(output_size) self.bn = nn.BatchNorm1d(z_size) self.n_mems, self.output_size = n_mems, output_size self.bidirectional = bidirectional if embedding is not None: self.sbn = sbn if sbn is not None: z_params_size = int(embedding.weight.shape[1] / sbn.n_experts) else: z_params_size = embedding.weight.shape[1] self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_params_size) for param in params}) else: self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_size) for param in params}) assert self.residual is None, "Named links still can't have residuals"
def __init__(self, input_size, output_size, z_size, depth, params, embedding=None, highway=False, sbn=None, dropout=0., batchnorm=False, residual=None, bidirectional=False, n_targets=20, nheads=2, sequence=None, memory=None, n_mems=None): super(CoattentiveTransformerLink, self).__init__(input_size, output_size, z_size, depth, params, embedding, highway, dropout=dropout, batchnorm=batchnorm, residual=residual) assert output_size % n_targets == 0 assert z_size % n_targets == 0 output_size = int(output_size/n_targets) self.target = nn.Embedding(n_targets, output_size).weight self.n_mems = n_mems self.memory = memory self.sequence = sequence self.input_to_hidden = nn.Linear(input_size, output_size) self.transformer_dec = TransformerDecoder(TransformerDecoderLayer(output_size, nheads, dim_feedforward=output_size*n_targets, dropout=dropout, activation='gelu'), depth) self.transformer_enc = TransformerEncoder(TransformerEncoderLayer(output_size, nheads, dim_feedforward=output_size, dropout=dropout, activation='gelu'), depth) self.pe = PositionalEncoding(output_size) self.bn = nn.BatchNorm1d(z_size) if embedding is not None: self.sbn = sbn if sbn is not None: z_params_size = int(embedding.weight.shape[1] / sbn.n_experts) else: z_params_size = embedding.weight.shape[1] self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, z_params_size) for param in params}) else: self.hidden_to_z_params = nn.ModuleDict({param: nn.Linear(output_size, int(z_size/n_targets)) for param in params})
def __init__(self, args, word_emb, spo_conf): print('drug extract using only char2v') super(ERENet, self).__init__() # self.word_emb = nn.Embedding.from_pretrained(torch.tensor(word_emb, dtype=torch.float32), freeze=True, # padding_idx=0) self.char_emb = nn.Embedding(num_embeddings=args.char_vocab_size, embedding_dim=args.char_emb_size, padding_idx=0) # self.word_convert_char = nn.Linear(args.word_emb_size, args.char_emb_size, bias=False) self.classes_num = len(spo_conf) self.first_sentence_encoder = SentenceEncoder(args, args.char_emb_size) self.second_sentence_encoder = SentenceEncoder(args, args.hidden_size * 2) # self.second_sentence_encoder = SentenceEncoder(args, args.hidden_size) self.token_entity_emb = nn.Embedding(num_embeddings=2, embedding_dim=args.hidden_size * 2, padding_idx=0) self.encoder_layer = TransformerEncoderLayer(args.hidden_size * 2, nhead=3) self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_layers=1) self.LayerNorm = ConditionalLayerNorm(args.hidden_size * 2, eps=1e-12) # pointer net work self.po_dense = nn.Linear(args.hidden_size * 2, self.classes_num * 2) self.subject_dense = nn.Linear(args.hidden_size * 2, 2) self.loss_fct = nn.BCEWithLogitsLoss(reduction='none')
def __init__(self, vocab_obj, args, device): super(_ATTR_NETWORK, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_attn_linear_size = args.attn_linear_size self.m_attr_user_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_attr_item_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num, self.m_attn_linear_size) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_attr_embedding_x = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) # self.m_output_attr_embedding_x = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.f_init_weight() self = self.to(self.m_device)
def __init__( self, n_head, dim_feedforward, enc_layers, dropout_prob, fc_dim, video_embedding_dim, audio_embedding_dim=256, ): # Call the parent init function (required!) super().__init__() # Define TransformerEncoder layer instance: encoder_layer = TransformerEncoderLayer(video_embedding_dim, n_head, dim_feedforward, dropout_prob) # Define Transformer layer self.trans = TransformerEncoder(encoder_layer, enc_layers) # FC layer to let the network decide how much audio will be considered to decide the result self.fc = nn.Linear(video_embedding_dim + audio_embedding_dim, fc_dim) # dropout layer after linear layer self.dropout = nn.Dropout(dropout_prob) # Layer that receives the two branches. # Keep it little in order to move a better representation search of rnn network self.concat_branches = nn.Linear(fc_dim * 2, 64) self.dropout_last = nn.Dropout(0.3) # Define output layer self.out = nn.Linear(64, 2)
def __init__(self, embeddings, feat_dim=512, max_word=32, multi_image=1, image_pe=True, layer_norm=False, num_enc_layers=6, num_dec_layers=6, teacher_forcing=False, image_model=None, image_pretrained=None, finetune_image=False, image_finetune_epoch=None, rl_opts=None, word_idxs=None, device='gpu', verbose=False): super(TransformerCaptioner, self).__init__(embeddings, feat_dim, max_word, multi_image, image_pe, layer_norm, num_dec_layers, teacher_forcing, image_model, image_pretrained, finetune_image, image_finetune_epoch, rl_opts, word_idxs, device, verbose) # Transformer Encoder encoder_layer = TransformerEncoderLayer(feat_dim, nhead=8) self.encoder = TransformerEncoder(encoder_layer, num_layers=num_enc_layers)
def __init__(self, vocab_size: int, n_classes: int, model_dim=256, n_heads=4, n_layers=4, ff_dim=1024, dropout=0.1, activation='relu', padding_idx=0): super().__init__() # remember to call super self.padding_idx = padding_idx embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=model_dim, padding_idx=padding_idx) pos_enc = PositionalEncoding(d_model=model_dim) self.embeddings = nn.Sequential(embeddings, pos_enc) enc_layer = TransformerEncoderLayer(d_model=model_dim, nhead=n_heads, dim_feedforward=ff_dim, dropout=dropout, activation=activation) self.encoder = TransformerEncoder(enc_layer, num_layers=n_layers) self.cls_proj = nn.Linear(model_dim, n_classes)
def __init__(self, config): super(TransformerEncoderModel, self).__init__(config) self.src_mask = None self._dim_embedding = self._config.model.dim_embedding self._dim_hidden = self._config.model.dim_hidden self._num_layer = self._config.model.nlayer self._num_vocab = self._config.data.num_vocab self._num_tag = self._config.data.num_tag self._dropout = self._config.learn.dropout_rate self.pos_encoder = PositionalEncoding(self._dim_embedding, self._dropout) self._embedding = nn.Embedding(self._num_vocab, self._dim_embedding) encoder_layers = TransformerEncoderLayer( self._dim_hidden, self._config.model.nhead, self._config.model.nhid, self._config.learn.dropout_rate) self.transformer_encoder = TransformerEncoder( encoder_layers, self._config.model.nlayer) self.lstm = nn.LSTM(input_size=self._dim_embedding, hidden_size=self._dim_hidden // 2, bidirectional=True, num_layers=self._num_layer, dropout=self._dropout) self._hidden2label = nn.Linear(self._dim_hidden, self._num_tag) self._crf = CRF(self._num_tag)
def __init__(self, config, word_vocab, vocab_size, tag_num, vector_path): super(TransformerEncoderModel, self).__init__() self.use_dae = config.use_dae self.dae_lambda = config.dae_lambda self.use_dice = config.use_dice self.dice_lambda = config.dice_lambda self.vocab_size = vocab_size self.word_vocab = word_vocab self.tag_num = tag_num self.embedding_dim = config.embedding_dim self.hidden_dim = config.hidden_dim self.bidirectional = True self.num_layers = config.num_layers self.dropout = config.dropout self.drop = nn.Dropout(self.dropout) self.vector_path = vector_path self.src_mask = None self.pos_encoder = PositionalEncoding(self.embedding_dim, self.dropout) encoder_layers = TransformerEncoderLayer(self.embedding_dim, config.n_head, config.n_hid, self.dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, config.n_layers) self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim) if config.use_vectors: logger.info('Loading word vectors from {}...'.format(self.vector_path)) embed_weights = load_word2vec(self.vector_path, self.word_vocab, embedding_dim=self.embedding_dim) logger.info('Finished load word vectors') self.embedding = nn.Embedding.from_pretrained(embed_weights, freeze=False).to(DEVICE) # self.embedding.weight.requires_grad = True self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim // 2, bidirectional=self.bidirectional, num_layers=1).to(DEVICE) self.linear = nn.Linear(self.hidden_dim, self.tag_num) self.lm_decoder = nn.Linear(self.hidden_dim, self.vocab_size) self.init_weights() self.crf_layer = CRF(self.tag_num) self.dice_loss = DiceLoss1() self.criterion = nn.CrossEntropyLoss()
def __init__(self, d_model=64, nhead=2, num_encoder_layers=4, dim_feedforward=32, dropout=0.1, activation='relu'): """ A Transformer module for a protein sequence. :param d_model: Dimension of the attention layers :param nhead: number of heads of the attention layer :param num_encoder_layers: number of transformer layers in the encoder :param dim_feedforward: Dimension of the feedforward layers :param dropout: dropout rate :param activation: relu or gelu """ super(ProteinTransformer, self).__init__() self.embedder = ProteinEmbedding(d_model) encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation) encoder_norm = nn.LayerNorm(d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) self.decoder = nn.Linear(d_model, len(seq_voc)) self.src_mask = None
def __init__(self, args, word_emb, ent_conf, spo_conf): print('mhs using only char2v+w2v mixed and word_emb is freeze ') super(ERENet, self).__init__() self.max_len = args.max_len self.word_emb = nn.Embedding.from_pretrained(torch.tensor(word_emb, dtype=torch.float32), freeze=True, padding_idx=0) self.char_emb = nn.Embedding(num_embeddings=args.char_vocab_size, embedding_dim=args.char_emb_size, padding_idx=0) self.word_convert_char = nn.Linear(args.word_emb_size, args.char_emb_size, bias=False) self.classes_num = len(spo_conf) self.first_sentence_encoder = SentenceEncoder(args, args.char_emb_size) # self.second_sentence_encoder = SentenceEncoder(args, args.hidden_size) # self.token_entity_emb = nn.Embedding(num_embeddings=2, embedding_dim=config.hidden_size, # padding_idx=0) self.encoder_layer = TransformerEncoderLayer(args.hidden_size * 2, nhead=3) self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_layers=1) self.LayerNorm = ConditionalLayerNorm(args.hidden_size * 2, eps=1e-12) # self.subject_dense = nn.Linear(args.hidden_size * 2, 2) self.ent_emission = nn.Linear(args.hidden_size * 2, len(ent_conf)) self.ent_crf = CRF(len(ent_conf), batch_first=True) self.emission = nn.Linear(args.hidden_size * 2, len(spo_conf)) self.crf = CRF(len(spo_conf), batch_first=True) self.loss_fct = nn.BCEWithLogitsLoss(reduction='none')
def __init__(self, args, char_emb, attribute_conf): print('basline 模型轻量') super(AttributeExtractNet, self).__init__() if char_emb is not None: self.char_emb = nn.Embedding.from_pretrained(torch.tensor(char_emb, dtype=torch.float32), freeze=False, padding_idx=0) else: self.char_emb = nn.Embedding(num_embeddings=args.vocab_size, embedding_dim=args.char_emb_size, padding_idx=0) # token whether belong to a entity, 1 represent a entity token, else 0; self.token_entity_emb = nn.Embedding(num_embeddings=2, embedding_dim=args.entity_emb_size, padding_idx=0) # sentence_encoder using lstm self.sentence_encoder = SentenceEncoder(args, args.char_emb_size) # sentence_encoder using transformer self.transformer_encoder_layer = TransformerEncoderLayer(args.hidden_size * 2, args.nhead, dim_feedforward=args.dim_feedforward) self.transformer_encoder = TransformerEncoder(self.transformer_encoder_layer, args.transformer_layers) self.classes_num = len(attribute_conf) # pointer net work self.attr_start = nn.Linear(args.hidden_size * 2, self.classes_num) self.attr_end = nn.Linear(args.hidden_size * 2, self.classes_num)
def __init__(self, n_head, dim_feedforward, enc_layers, dropout_prob, fc_dim, video_embedding_dim, audio_embedding_dim=256, ): # Call the parent init function (required!) super().__init__() # Define TransformerEncoder layer instance: encoder_layer = TransformerEncoderLayer(video_embedding_dim, n_head, dim_feedforward, dropout_prob) # Define Transformer layer self.trans = TransformerEncoder(encoder_layer, enc_layers) # FC layer to let the network decide how much audio will be considered to decide the result self.fc = nn.Linear(video_embedding_dim + audio_embedding_dim, fc_dim) # dropout layer after linear layer self.dropout = nn.Dropout(dropout_prob) # Define output layer self.out = nn.Linear(fc_dim, 1)
def __init__(self, input_dim=13, num_classes=9, d_model=64, n_head=2, n_layers=5, d_inner=128, activation="relu", dropout=0.017998950510888446, max_len=200): super(PETransformerModel, self).__init__() self.modelname = f"PeTransformerEncoder_input-dim={input_dim}_num-classes={num_classes}_" \ f"d-model={d_model}_d-inner={d_inner}_n-layers={n_layers}_n-head={n_head}_" \ f"dropout={dropout}" encoder_layer = TransformerEncoderLayer(d_model, n_head, d_inner, dropout, activation) encoder_norm = LayerNorm(d_model) self.inlinear = Linear(input_dim, d_model) self.relu = ReLU() self.transformerencoder = TransformerEncoder(encoder_layer, n_layers, encoder_norm) self.flatten = Flatten() self.outlinear = Linear(d_model, num_classes) self.pe = PositionalEncoding(d_model, max_len=max_len) """
def __init__(self, vocab_obj, args, device): super(_ATTR_NETWORK, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_output_hidden_size = args.output_hidden_size self.m_input_attr_embedding_item = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_input_attr_embedding_user = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_user_linear = nn.Linear(self.m_user_embed_size, self.m_output_hidden_size) self.m_item_linear = nn.Linear(self.m_item_embed_size, self.m_output_hidden_size) self.m_attr_linear = nn.Linear(self.m_attr_embed_size, self.m_output_hidden_size) self.m_gamma = args.gamma # self.m_user_output = nn.Linear(1, 1) self.m_attr_item_linear = nn.Linear(1, 1) self.m_attr_user_linear = nn.Linear(1, 1) self.m_output_linear_user = nn.Linear(self.m_output_hidden_size, self.m_attr_embed_size) self.m_output_linear_item = nn.Linear(self.m_output_hidden_size, self.m_attr_embed_size) # self.m_output_linear = nn.Linear(self.m_output_hidden_size+self.m_user_embed_size, self.m_attr_embed_size) # self.m_user_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) # self.m_item_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) # self.m_output_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self = self.to(self.m_device)
def __init__ (self, opt, dictionary): super(TransformerAdapter, self).__init__() self.opt = opt self.pad_idx = dictionary[PAD_TOKEN] self.embeddings = nn.Embedding(len(dictionary), opt.embeddings_size, padding_idx=self.pad_idx) nn.init.normal_(self.embeddings.weight, mean=0, std=0.05) self.encoder_layer = TransformerEncoderLayer(d_model = opt.transformer_dim,nhead = opt.transformer_n_head,dim_feedforward = 4*opt.transformer_dim) self.ctx_encoder = TransformerEncoder(self.encoder_layer,opt.n_layers)
def __init__(self, vocab_obj, args, device): super(_ATTR_NETWORK, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_attn_linear_size = args.attn_linear_size self.m_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num, self.m_attn_linear_size) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_gamma = args.gamma self.m_output_attr_embedding_user = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_output_attr_embedding_item = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_output_attr_embedding_user_x = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_output_attr_embedding_item_x = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_exp_user = nn.Linear(1, 1) self.m_log_user = nn.Linear(1, 1) self.m_linear_user = nn.Linear(1, 1) self.m_exp_item = nn.Linear(1, 1) self.m_log_item = nn.Linear(1, 1) self.m_linear_item = nn.Linear(1, 1) self.m_attr_user = nn.Linear(self.m_attr_embed_size, 4) self.m_attr_item = nn.Linear(self.m_attr_embed_size, 4) self.m_bias_tf = nn.Linear(1, 1, bias=False) self.f_init_weight() self = self.to(self.m_device)
def __init__(self, vocab, label_map): super().__init__() self.embedding = nn.Embedding(len(vocab), 200) self.pos_encode = PositionalEncoding(200) tansformer_layer = TransformerEncoderLayer(200, 8, dim_feedforward= 200) self.tansformer = TransformerEncoder(tansformer_layer, num_layers=1) self.line1 = nn.Linear(200, 100) self.line2 = nn.Linear(100, len(label_map)) self.vocab = vocab
def __init__(self, embedding, input_dim, head_size, feed_forward_dim, dropout, num_layers): super().__init__() self.embedding = embedding self.dropout = dropout layer = TransformerEncoderLayer(d_model=input_dim, nhead=head_size, dim_feedforward=feed_forward_dim, dropout=dropout) self.encoder = TransformerEncoder(encoder_layer=layer, num_layers=num_layers)
def __init__(self, vocab_size, nb_labels, in_dim=512, num_head=8, num_layers=4, dropout=0.25): super(Transformer, self).__init__() h_dim = 8 * in_dim self.emb = nn.Embedding(vocab_size, in_dim) encoder_layers = TransformerEncoderLayer(2 * in_dim, num_head, h_dim, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers) self.positional_encoder = PositionalEncoding(2 * in_dim) self.in_dim = in_dim self.h_dim = h_dim self.hidden2tag = nn.Linear(2 * in_dim, nb_labels)
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
def __init__(self, vocab_obj, args, device): super(_ATTR_NETWORK, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_attn_linear_size = args.attn_linear_size # self.m_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) self.m_attr_embedding_user = nn.Embedding(self.m_vocab_size, self.m_user_embed_size) self.m_attr_embedding_item = nn.Embedding(self.m_vocab_size, self.m_user_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num, self.m_attn_linear_size) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_gamma = args.gamma ### cls: self.m_vocab_size+1 ### 0: pad_id self.m_attr_embedding_x = nn.Embedding(self.m_vocab_size + 1, self.m_attr_embed_size) seg_num = 2 self.m_seg_embedding = nn.Embedding(seg_num, self.m_attr_embed_size) max_seq_len = args.max_seq_length self.m_pop_embedding = nn.Embedding(max_seq_len + 2, self.m_attr_embed_size) self.f_init_weight() self = self.to(self.m_device)
def __init__( self, vocab_size: int, max_seq_len: int, d_model: int, nhead: int, num_layers: int, dropout: float, mode: str, ): super().__init__() self.d_model = d_model self.max_seq_len = max_seq_len self.input_embedding = nn.Embedding(vocab_size, d_model) self.pos_encoder = PositionalEncoding(dropout, d_model) encoder_layer = TransformerEncoderLayer(d_model, nhead, d_model * 4, dropout, norm_first=True) self.encoder = TransformerEncoder(encoder_layer, num_layers, nn.LayerNorm(d_model)) self.mode = mode
def __init__(self, embedding, args): super().__init__() self.embedding = embedding self.input_dim = args.input_dim self.head_size = args.head_size, self.feed_forward_dim = args.feed_forward_dim self.num_layzers = args.num_layers self.dropout = args.dropout layer = TransformerEncoderLayer(d_model=self.input_dim, nhead=self.head_size, dim_feedforward=self.feed_forward_dim, dropout=self.dropout) self.encoder = TransformerEncoder(encoder_layer=layer, num_layers=self.num_layers)
def __init__(self, args, spo_conf): super(RelNET, self).__init__() self.token_entity_emb = nn.Embedding( num_embeddings=2, embedding_dim=args.bert_hidden_size, padding_idx=0) self.encoder_layer = TransformerEncoderLayer(args.bert_hidden_size, args.nhead) self.transformer_encoder = TransformerEncoder(self.encoder_layer, args.transformer_layers) self.classes_num = len(spo_conf) self.ob1 = nn.Linear(args.bert_hidden_size, self.classes_num) self.ob2 = nn.Linear(args.bert_hidden_size, self.classes_num)
def __init__(self, config, ntoken, ntag, vectors): super(CNN_TransformerEncoderModel, self).__init__() self.config = config self.src_mask = None self.vectors = vectors self.sizes = [3, 5, 7] if config.is_vector: vectors = Vectors(name='./vector/sgns.wiki.word') self.embedding = nn.Embedding.from_pretrained(vectors) self.convs = nn.ModuleList([ nn.Conv2d(config.chanel_num, config.filter_num, (size, config.embedding_size), padding=size // 2) for size in self.sizes ]) self.embedding_size = config.embedding_size self.embedding = nn.Embedding(ntoken, config.embedding_size) self.pos_encoder = PositionalEncoding(config.embedding_size, config.dropout) encoder_layers = TransformerEncoderLayer(config.embedding_size, config.nhead, config.nhid, config.dropout) self.lstm = nn.LSTM(input_size=config.embedding_size, hidden_size=config.bi_lstm_hidden // 2, num_layers=1, bidirectional=True) self.att_weight = nn.Parameter( torch.randn(config.bi_lstm_hidden, config.batch_size, config.bi_lstm_hidden)) self.transformer_encoder = TransformerEncoder(encoder_layers, config.nlayers) if config.is_pretrained_model: # with torch.no_grad(): config_bert = BertConfig.from_pretrained(config.pretrained_config) model = BertModel.from_pretrained(config.pretrained_model, config=config_bert) self.embedding = model for name, param in model.named_parameters(): param.requires_grad = True elif config.is_vector: self.embedding = nn.Embedding.from_pretrained(vectors, freeze=False) self.embedding.weight.requires_grad = True self.emsize = config.embedding_size self.linner = nn.Linear(config.bi_lstm_hidden, ntag) self.init_weights() self.crflayer = CRF(ntag)
def __init__(self, args, spo_conf): super(RelNET, self).__init__() self.token_entity_emb = nn.Embedding( num_embeddings=2, embedding_dim=args.entity_emb_size, padding_idx=0) self.sentence_encoder = SentenceEncoder(args, args.word_emb_size) self.transformer_encoder_layer = TransformerEncoderLayer( args.hidden_size * 2, args.nhead) self.transformer_encoder = TransformerEncoder( self.transformer_encoder_layer, args.transformer_layers) self.classes_num = len(spo_conf) self.po1 = nn.Linear(args.hidden_size * 2, self.classes_num) self.po2 = nn.Linear(args.hidden_size * 2, self.classes_num)
def __init__(self, device, in_dim=512, num_head=8, num_layers=4, dropout=0.25): super(Transformer, self).__init__() h_dim = 4 * in_dim encoder_layers = TransformerEncoderLayer(in_dim, num_head, h_dim, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers) self.positional_encoder = PositionalEncoding(in_dim) self.in_dim = in_dim self.h_dim = h_dim self.device = device
def __init__(self, vocab_obj, args, device): super(_ATTR_NETWORK, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_output_hidden_size = args.output_hidden_size self.m_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_user_linear = nn.Linear(self.m_user_embed_size, self.m_output_hidden_size) self.m_item_linear = nn.Linear(self.m_item_embed_size, self.m_output_hidden_size) self.m_attr_linear = nn.Linear(self.m_attr_embed_size, self.m_output_hidden_size) # self.m_mix_tanh = nn.Tanh() # self.m_mix_af = nn.ReLU() # self.m_output = nn.Linear(self.m_output_hidden_size, 1) # self.m_output = nn.Linear(1, 1) self.m_lambda = 0.28 self.m_user_output = nn.Linear(1, 1) self.m_attr_item_linear = nn.Linear(1, 1) self.m_attr_user_linear = nn.Linear(1, 1) self = self.to(self.m_device)
def __init__(self, vocab_obj, args, device): super(BPR, self).__init__() self.m_device = device self.m_vocab_size = vocab_obj.vocab_size self.m_user_num = vocab_obj.user_num self.m_item_num = vocab_obj.item_num self.m_attr_embed_size = args.attr_emb_size self.m_user_embed_size = args.user_emb_size self.m_item_embed_size = args.item_emb_size self.m_attn_head_num = args.attn_head_num self.m_attn_layer_num = args.attn_layer_num self.m_output_hidden_size = args.output_hidden_size self.m_attn_linear_size = args.attn_linear_size self.m_attr_embedding = nn.Embedding(self.m_vocab_size, self.m_attr_embed_size) self.m_user_embedding = nn.Embedding(self.m_user_num, self.m_user_embed_size) self.m_item_embedding = nn.Embedding(self.m_item_num, self.m_item_embed_size) encoder_layers = TransformerEncoderLayer(self.m_attr_embed_size, self.m_attn_head_num, self.m_attn_linear_size) self.m_attn = TransformerEncoder(encoder_layers, self.m_attn_layer_num) self.m_gamma = args.gamma self.m_output_attr_embedding_user = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_output_attr_embedding_item = nn.Embedding( self.m_vocab_size, self.m_attr_embed_size) self.m_beta = 1.0 self.m_linear_tf = nn.Linear(1, 1) self.f_init_weight() self = self.to(self.m_device)