def __init__(self, embed, hidden_size, num_layers, tag_vocab, dropout=0.5, encoding_type='bioes'): super().__init__() self.embedding = embed self.lstm = LSTM(input_size=self.embedding.embedding_dim, hidden_size=hidden_size // 2, num_layers=num_layers, bidirectional=True, batch_first=True) self.fc = nn.Linear(hidden_size, len(tag_vocab)) transitions = allowed_transitions(tag_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=transitions) self.dropout = nn.Dropout(dropout, inplace=True) for name, param in self.named_parameters(): if 'fc' in name: if param.data.dim() > 1: nn.init.xavier_uniform_(param) else: nn.init.constant_(param, 0) if 'crf' in name: nn.init.zeros_(param)
def __init__(self, args, embedding, hid_dim): super(GCN, self).__init__() self.args = args self.layers = args.num_layers self.mem_dim = hid_dim self.in_dim = args.tok_dim + args.pos_dim + args.post_dim self.tok_emb, self.pos_emb, self.post_emb = embedding # drop out self.rnn_drop = nn.Dropout(args.rnn_dropout) self.in_drop = nn.Dropout(args.input_dropout) self.gcn_drop = nn.Dropout(args.gcn_dropout) # lstm input_size = self.in_dim self.rnn = LSTM( input_size, args.rnn_hidden, args.rnn_layers, batch_first=True, dropout=args.rnn_dropout, bidirectional=args.bidirect, ) if args.bidirect: self.in_dim = args.rnn_hidden * 2 else: self.in_dim = args.rnn_hidden # gcn layer self.G = nn.ModuleList() for layer in range(self.layers): input_dim = [self.in_dim, self.mem_dim][layer != 0] self.G.append(MLP([input_dim, self.mem_dim]))
def __init__(self, char_embed, hidden_size, num_layers, target_vocab=None, bigram_embed=None, trigram_embed=None, dropout=0.5): super().__init__() embed_size = char_embed.embed_size self.char_embed = char_embed if bigram_embed: embed_size += bigram_embed.embed_size self.bigram_embed = bigram_embed if trigram_embed: embed_size += trigram_embed.embed_size self.trigram_embed = trigram_embed self.lstm = LSTM(embed_size, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True, num_layers=num_layers) self.dropout = nn.Dropout(p=dropout) self.fc = nn.Linear(hidden_size, len(target_vocab)) transitions = None if target_vocab: transitions = allowed_transitions(target_vocab, include_start_end=True, encoding_type='bmes') self.crf = ConditionalRandomField(num_tags=len(target_vocab), allowed_transitions=transitions)
def __init__(self, char_embed, num_classes, bigram_embed=None, trigram_embed=None, num_layers=1, hidden_size=100, dropout=0.5, target_vocab=None, encoding_type=None): super().__init__() self.char_embed = get_embeddings(char_embed) embed_size = self.char_embed.embedding_dim if bigram_embed: self.bigram_embed = get_embeddings(bigram_embed) embed_size += self.bigram_embed.embedding_dim if trigram_embed: self.trigram_ebmbed = get_embeddings(trigram_embed) embed_size += self.bigram_embed.embedding_dim if num_layers > 1: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True, dropout=dropout) else: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(hidden_size, num_classes) trans = None if target_vocab is not None and encoding_type is not None: trans = allowed_transitions(target_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(num_classes, include_start_end_trans=True, allowed_transitions=trans)
def __init__(self,bert_embedding,label_size,vocabs,after_bert): super().__init__() self.after_bert = after_bert self.bert_embedding = bert_embedding self.label_size = label_size self.vocabs = vocabs self.hidden_size = bert_embedding._embed_size self.output = nn.Linear(self.hidden_size,self.label_size) self.crf = get_crf_zero_init(self.label_size) if self.after_bert == 'lstm': self.lstm = LSTM(bert_embedding._embed_size,bert_embedding._embed_size//2, bidirectional=True) self.dropout = MyDropout(0.5)
def __init__(self, char_embed, bigram_embed, word_embed, hidden_size, label_size, bias=True, bidirectional=False, device=None, embed_dropout=0, output_dropout=0, use_bigram=True): if device is None: self.device = torch.device('cpu') else: self.device = torch.device(device) super().__init__() self.char_embed_size = char_embed.embedding.weight.size(1) self.bigram_embed_size = bigram_embed.embedding.weight.size(1) self.word_embed_size = word_embed.embedding.weight.size(1) self.hidden_size = hidden_size self.label_size = label_size self.bidirectional = bidirectional self.use_bigram = use_bigram self.char_embed = char_embed self.bigram_embed = bigram_embed self.word_embed = word_embed if self.use_bigram: self.input_size = self.char_embed_size + self.bigram_embed_size else: self.input_size = self.char_embed_size self.encoder = LSTM(self.input_size, self.hidden_size, bidirectional=self.bidirectional) better_init_rnn(self.encoder.lstm) self.output = nn.Linear( self.hidden_size * (2 if self.bidirectional else 1), self.label_size) self.debug = False self.loss_func = nn.CrossEntropyLoss() self.embed_dropout = nn.Dropout(embed_dropout) self.output_dropout = nn.Dropout(output_dropout) self.crf = ConditionalRandomField(label_size, True)
def __init__(self, config): super(Encoder, self).__init__() self.config = config self.embedding = nn.Embedding(config.vocab_size, config.emb_dim) init_wt_normal(config, self.embedding.weight) self.join = nn.Linear(4 * config.hidden_dim, 2 * config.hidden_dim) init_linear_wt(config, self.join) self.lstm = LSTM(config.emb_dim, config.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.graph_feature_lstm = LSTM(config.emb_dim, config.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.mlp = MLP(size_layer=[ config.hidden_dim * 4, config.hidden_dim * 2, config.hidden_dim * 2, 1 ], activation="tanh") self.criterion = nn.MSELoss(reduction="sum")
def __init__(self, char_embed, bigram_embed, hidden_size: int = 400, num_layers: int = 1, L: int = 6, drop_p: float = 0.2): super().__init__() self.char_embedding = char_embed self.bigram_embedding = bigram_embed self.lstm = LSTM(char_embed.embed_size + bigram_embed.embed_size, hidden_size // 2, num_layers=num_layers, bidirectional=True, batch_first=True) self.feature_fn = FeatureFunMax(hidden_size, L) self.semi_crf_relay = SemiCRFShiftRelay(L) self.feat_drop = nn.Dropout(drop_p) self.reset_param()
def __init__(self, char_embed: Embedding, bigram_embed: Embedding, hidden_size: int = 400, num_layers: int = 1, L: int = 6, num_bigram_per_char: int = 1, drop_p: float = 0.2): super().__init__() self.char_embedding = Embedding(char_embed, dropout=drop_p) self._pretrained_embed = False if isinstance(char_embed, np.ndarray): self._pretrained_embed = True self.bigram_embedding = Embedding(bigram_embed, dropout=drop_p) self.lstm = LSTM(100 * (num_bigram_per_char + 1), hidden_size // 2, num_layers=num_layers, bidirectional=True, batch_first=True) self.feature_fn = FeatureFunMax(hidden_size, L) self.semi_crf_relay = SemiCRFShiftRelay(L) self.feat_drop = nn.Dropout(drop_p) self.reset_param()
def __init__(self, args, dep_tag_num, pos_tag_num): super(Aspect_Text_GAT_ours, self).__init__() self.args = args num_embeddings, embed_dim = args.glove_embedding.shape self.embed = nn.Embedding(num_embeddings, embed_dim, padding_idx=0) self.embed.weight = nn.Parameter(args.glove_embedding, requires_grad=False) self.dropout = nn.Dropout(args.dropout) if args.highway: self.highway_dep = Highway(args.num_layers, args.embedding_dim) self.highway = Highway(args.num_layers, args.embedding_dim) if args.num_layers > 1: self.bilstm = LSTM(input_size=args.embedding_dim, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, num_layers=args.num_layers, dropout=0.5) else: self.bilstm = LSTM(input_size=args.embedding_dim, hidden_size=args.hidden_size, bidirectional=True, batch_first=True, num_layers=args.num_layers) gcn_input_dim = args.hidden_size * 2 # if args.gat: self.gat_dep = [ RelationAttention(in_dim=args.embedding_dim).to(args.device) for i in range(args.num_heads) ] if args.gat_attention_type == 'linear': self.gat = [ LinearAttention(in_dim=gcn_input_dim, mem_dim=gcn_input_dim).to(args.device) for i in range(args.num_heads) ] # we prefer to keep the dimension unchanged elif args.gat_attention_type == 'dotprod': self.gat = [DotprodAttention() for i in range(args.num_heads)] else: # reshaped gcn self.gat = nn.Linear(gcn_input_dim, gcn_input_dim) self.dep_embed = nn.Embedding(dep_tag_num, args.embedding_dim, padding_idx=0) torch.nn.init.uniform_(self.dep_embed.weight, a=-1. / math.sqrt(args.embedding_dim), b=1. / math.sqrt(args.embedding_dim)) last_hidden_size = args.hidden_size * 4 layers = [ nn.Linear(last_hidden_size, args.final_hidden_size), nn.Dropout(0.5), nn.ReLU() ] for _ in range(args.num_mlps - 1): layers += [ nn.Linear(args.final_hidden_size, args.final_hidden_size), nn.Dropout(0.5), nn.ReLU() ] self.fcs = nn.Sequential(*layers) self.fc_final = nn.Linear(args.final_hidden_size, args.num_classes) self._reset_params()
def __init__(self, lattice_embed, bigram_embed, hidden_size, label_size, num_heads, num_layers, use_abs_pos, use_rel_pos, learnable_position, add_position, layer_preprocess_sequence, layer_postprocess_sequence, ff_size=-1, scaled=True, dropout=None, use_bigram=True, mode=collections.defaultdict(bool), dvc=None, vocabs=None, rel_pos_shared=True, max_seq_len=-1, k_proj=True, q_proj=True, v_proj=True, r_proj=True, self_supervised=False, attn_ff=True, pos_norm=False, ff_activate='relu', rel_pos_init=0, abs_pos_fusion_func='concat', embed_dropout_pos='0', four_pos_shared=True, four_pos_fusion=None, four_pos_fusion_shared=True, bert_embedding=None, use_pos_tag=False, after_bert='mlp'): ''' :param rel_pos_init: 如果是0,那么从-max_len到max_len的相对位置编码矩阵就按0-2*max_len来初始化, 如果是1,那么就按-max_len,max_len来初始化 :param embed_dropout_pos: 如果是0,就直接在embed后dropout,是1就在embed变成hidden size之后再dropout, 是2就在绝对位置加上之后dropout ''' super().__init__() self.use_bert = False if bert_embedding is not None: self.use_bert = True self.bert_embedding = bert_embedding self.after_bert = after_bert self.four_pos_fusion_shared = four_pos_fusion_shared self.mode = mode self.four_pos_shared = four_pos_shared self.abs_pos_fusion_func = abs_pos_fusion_func self.lattice_embed = lattice_embed self.bigram_embed = bigram_embed self.hidden_size = hidden_size self.label_size = label_size self.num_heads = num_heads self.num_layers = num_layers # self.relative_position = relative_position self.use_abs_pos = use_abs_pos self.use_rel_pos = use_rel_pos if self.use_rel_pos: assert four_pos_fusion is not None self.four_pos_fusion = four_pos_fusion self.learnable_position = learnable_position self.add_position = add_position self.rel_pos_shared = rel_pos_shared self.self_supervised = self_supervised self.vocabs = vocabs self.attn_ff = attn_ff self.pos_norm = pos_norm self.ff_activate = ff_activate self.rel_pos_init = rel_pos_init self.embed_dropout_pos = embed_dropout_pos self.use_pos_tag = use_pos_tag if self.use_rel_pos and max_seq_len < 0: print_info('max_seq_len should be set if relative position encode') exit(1208) self.max_seq_len = max_seq_len self.k_proj = k_proj self.q_proj = q_proj self.v_proj = v_proj self.r_proj = r_proj self.pe = None if self.use_abs_pos: self.abs_pos_encode = Absolute_SE_Position_Embedding( self.abs_pos_fusion_func, self.hidden_size, learnable=self.learnable_position, mode=self.mode, pos_norm=self.pos_norm) if self.use_rel_pos: pe = get_embedding(max_seq_len, hidden_size, rel_pos_init=self.rel_pos_init) pe_sum = pe.sum(dim=-1, keepdim=True) if self.pos_norm: with torch.no_grad(): pe = pe / pe_sum self.pe = nn.Parameter(pe, requires_grad=self.learnable_position) if self.four_pos_shared: self.pe_ss = self.pe self.pe_se = self.pe self.pe_es = self.pe self.pe_ee = self.pe else: self.pe_ss = nn.Parameter( copy.deepcopy(pe), requires_grad=self.learnable_position) self.pe_se = nn.Parameter( copy.deepcopy(pe), requires_grad=self.learnable_position) self.pe_es = nn.Parameter( copy.deepcopy(pe), requires_grad=self.learnable_position) self.pe_ee = nn.Parameter( copy.deepcopy(pe), requires_grad=self.learnable_position) else: self.pe = None self.pe_ss = None self.pe_se = None self.pe_es = None self.pe_ee = None self.layer_preprocess_sequence = layer_preprocess_sequence self.layer_postprocess_sequence = layer_postprocess_sequence if ff_size == -1: ff_size = self.hidden_size self.ff_size = ff_size self.scaled = scaled if dvc == None: dvc = 'cpu' self.dvc = torch.device(dvc) if dropout is None: self.dropout = collections.defaultdict(int) else: self.dropout = dropout self.use_bigram = use_bigram if self.use_bigram: self.bigram_size = self.bigram_embed.embedding.weight.size(1) self.char_input_size = self.lattice_embed.embedding.weight.size( 1) + self.bigram_embed.embedding.weight.size(1) else: self.char_input_size = self.lattice_embed.embedding.weight.size(1) if self.use_bert: self.char_input_size += self.bert_embedding._embed_size self.lex_input_size = self.lattice_embed.embedding.weight.size(1) self.embed_dropout = MyDropout(self.dropout['embed']) self.gaz_dropout = MyDropout(self.dropout['gaz']) self.char_proj = nn.Linear(self.char_input_size, self.hidden_size) self.lex_proj = nn.Linear(self.lex_input_size, self.hidden_size) self.encoder = Transformer_Encoder( self.hidden_size, self.num_heads, self.num_layers, relative_position=self.use_rel_pos, learnable_position=self.learnable_position, add_position=self.add_position, layer_preprocess_sequence=self.layer_preprocess_sequence, layer_postprocess_sequence=self.layer_postprocess_sequence, dropout=self.dropout, scaled=self.scaled, ff_size=self.ff_size, mode=self.mode, dvc=self.dvc, max_seq_len=self.max_seq_len, pe=self.pe, pe_ss=self.pe_ss, pe_se=self.pe_se, pe_es=self.pe_es, pe_ee=self.pe_ee, k_proj=self.k_proj, q_proj=self.q_proj, v_proj=self.v_proj, r_proj=self.r_proj, attn_ff=self.attn_ff, ff_activate=self.ff_activate, lattice=True, four_pos_fusion=self.four_pos_fusion, four_pos_fusion_shared=self.four_pos_fusion_shared) self.output_dropout = MyDropout(self.dropout['output']) print('pos_tag len...', len(list(vocabs['pos_tag']))) self.pos_feature_size = 10 self.pos_embed_size = 28 if self.use_pos_tag: self.pos_embedding = nn.Embedding(len(list(vocabs['pos_tag'])), self.pos_embed_size) self.pos_pj = nn.Linear(self.pos_embed_size, self.pos_feature_size) else: self.pos_feature_size = 0 self.output = nn.Linear(self.hidden_size + self.pos_feature_size, self.label_size) if self.after_bert == 'lstm': self.pj_after_bert = LSTM( self.hidden_size + self.pos_feature_size, (self.hidden_size + self.pos_feature_size) // 2, bidirectional=True, num_layers=2, ) if self.self_supervised: self.output_self_supervised = nn.Linear(self.hidden_size, len(vocabs['char'])) print('self.output_self_supervised:{}'.format( self.output_self_supervised.weight.size())) self.crf = get_crf_zero_init(self.label_size) self.loss_func = nn.CrossEntropyLoss(ignore_index=-100)