def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, useElmo = False, weight_file = '', options_file = ''): super(TaggerBiRNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size, useElmo, weight_file, options_file) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings) self.dropout = torch.nn.Dropout(p=dropout_ratio) print ("init targer BiRNNcfr") if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"') self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2) self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1, tag_seq_indexer=tag_seq_indexer) if gpu >= 0: self.cuda(device=self.gpu)
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1): super(TaggerBiRNN, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu if ((not word_seq_indexer.bert) and (not word_seq_indexer.elmo)): self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) elif (word_seq_indexer.bert): print("word_seq_indexer.bert gpu", gpu) self.word_embeddings_layer = LayerContextWordEmbeddingsBert( word_seq_indexer, gpu, freeze_word_embeddings) else: self.word_embeddings_layer = LayerContextWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) self.dropout = torch.nn.Dropout(p=dropout_ratio) if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla( input_dim=self.word_embeddings_layer.output_dim + self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError( 'Unknown rnn_type = %s, must be either "LSTM" or "GRU"') # We add an additional class that corresponds to the zero-padded values not to be included to the loss function self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 1) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss( ignore_index=0 ) # "0" target values actually are zero-padded parts of sequences
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=50, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='LSTM', gpu=-1, latent_dim=16, pooling_type='attention'): super(TaggerBiRNN, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) self.dropout = torch.nn.Dropout(p=dropout_ratio) self.latent_dim = latent_dim self.birnn_layer = LayerBiLSTM( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) self.pooler = LayerPooler(input_dim=self.birnn_layer.output_dim, gpu=gpu, pooling_type=pooling_type) if latent_dim is not None: self.dim_red = nn.Sequential( nn.Linear(in_features=self.pooler.output_dim, out_features=latent_dim), nn.Sigmoid()) self.dim_red.apply(self.inititialize_random_projection) lin_layer_in = latent_dim else: lin_layer_in = self.pooler.output_dim self.lin_layer = nn.Linear(in_features=lin_layer_in, out_features=class_num) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss()
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, freeze_char_embeddings = False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30, char_window_size=3, emb_type='word'): super(TaggerBiRNNCNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu #self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings) self.freeze_char_embeddings = freeze_char_embeddings self.char_embeddings_dim = char_embeddings_dim self.word_len = word_len self.char_cnn_filter_num = char_cnn_filter_num self.char_window_size = char_window_size self.word_embeddings_layer = EmbeddingsFactory.create(emb_type, word_seq_indexer, gpu, freeze_word_embeddings) self.char_embeddings_layer = LayerCharEmbeddings(gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size, word_len) self.dropout = torch.nn.Dropout(p=dropout_ratio) if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"') self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2) self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1, tag_seq_indexer=tag_seq_indexer) self.softmax = nn.Softmax(dim=2) if gpu >= 0: self.cuda(device=self.gpu)
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, freeze_char_embeddings=False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30, char_window_size=3): super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.freeze_char_embeddings = freeze_char_embeddings self.char_embeddings_dim = char_embeddings_dim self.word_len = word_len self.char_cnn_filter_num = char_cnn_filter_num self.char_window_size = char_window_size self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size, word_len) self.dropout = torch.nn.Dropout(p=dropout_ratio) if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU( input_dim=self.word_embeddings_layer.output_dim + self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM( input_dim=self.word_embeddings_layer.output_dim + self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla( input_dim=self.word_embeddings_layer.output_dim + self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError( 'Unknown rnn_type = %s, must be either "LSTM" or "GRU"') # We add an additional class that corresponds to the zero-padded values not to be included to the loss function self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 1) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss( ignore_index=0 ) # "0" target values actually are zero-padded parts of sequences
def __init__(self, args, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, freeze_char_embeddings=False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30, char_window_size=3): super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = True self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.freeze_char_embeddings = freeze_char_embeddings self.char_embeddings_dim = char_embeddings_dim self.word_len = word_len self.char_cnn_filter_num = char_cnn_filter_num self.char_window_size = char_window_size self.if_elmo = args.if_elmo self.if_bert = args.if_bert self.if_flair = args.if_flair if args.if_glove or args.if_wordEmbRand or args.if_twitter_emb: self.if_word = True else: self.if_word = False if args.if_char_cnn or args.if_char_lstm: self.if_char = True else: self.if_char = False # self.elmo_embeddings_dim = args.elmo_embeddings_dim # self.bert_embeddings_dim = args.bert_embeddings_dim self.bert_mode = 'mean' self.options_file = args.options_file self.weight_file = args.weight_file emb_models_dim = [] if args.if_wordEmbRand: self.word_embeddings_layer = LayerWordEmbeddings_Rand( word_seq_indexer, gpu, self.freeze_embeddings) emb_models_dim.append(self.word_embeddings_layer.output_dim) if args.if_glove: self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, self.freeze_embeddings) emb_models_dim.append(self.word_embeddings_layer.output_dim) if args.if_char_lstm: self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_layer = LayerCharLSTM(gpu, char_embeddings_dim, self.char_lstm_hidden_dim, word_len) emb_models_dim.append(self.char_layer.output_dim) if args.if_char_cnn: self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size, word_len) emb_models_dim.append(self.char_layer.output_dim) if args.if_elmo: self.elmo_embeddings_layer = LayerElmoEmbeddings( args, gpu, self.elmo_embeddings_dim, self.options_file, self.weight_file, freeze_char_embeddings, word_len) emb_models_dim.append(self.elmo_embeddings_layer.output_dim) if args.if_bert: self.bert_embeddings_layer = LayerBertEmbeddings( gpu, self.bert_embeddings_dim, self.bert_mode) emb_models_dim.append(self.bert_embeddings_layer.output_dim) print('start bert embedding successful...') if args.if_flair: self.flair_embeddings_layer = LayerFlairEmbeddings(gpu) emb_models_dim.append(self.flair_embeddings_layer.output_dim) self.input_dim = sum(emb_models_dim) # # self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings) self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size, word_len) self.dropout = torch.nn.Dropout(p=dropout_ratio) if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError( 'Unknown rnn_type = %s, must be either "LSTM" or "GRU"') # We add an additional class that corresponds to the zero-padded values not to be included to the loss function self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 1) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss( ignore_index=0 ) # "0" target values actually are zero-padded parts of sequences
def __init__(self, args, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, freeze_char_embeddings=False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30, char_window_size=3): super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.freeze_char_embeddings = freeze_char_embeddings self.char_embeddings_dim = char_embeddings_dim self.word_len = word_len self.char_cnn_filter_num = char_cnn_filter_num self.char_window_size = char_window_size self.args = args self.if_bert = args.if_bert self.if_flair = args.if_flair self.dropout = torch.nn.Dropout(p=dropout_ratio) emb_models_dim = [] print('load embedding...') if args.if_bert: # print('PYTORCH_PRETRAINED_BERT_CACHE',PYTORCH_PRETRAINED_BERT_CACHE) cache_dir = args.cache_dir if args.cache_dir else os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format( args.local_rank)) # cache_dir =os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),'distributed_{}'.format(args.local_rank)) print('cache_dir', cache_dir) # cache_dir = '/home/jlfu/.pytorch_pretrained_bert/distributed_-1' # cache_dir='emb/bert_model_cache/bert_cache.hdf5', self.bert_embeddings_layer = LayerBertEmbeddings.from_pretrained( args.bert_model, cache_dir=cache_dir, num_labels=class_num) # self.bert_embeddings_layer = LayerBertEmbeddings(gpu, freeze_bert_embeddings=True) reduce_dim = False if reduce_dim: self.W_bert = nn.Linear(args.bert_output_dim, 256) emb_models_dim.append(256) else: emb_models_dim.append(args.bert_output_dim) if args.if_flair: self.flair_embeddings_layer = LayerFlairEmbeddings(gpu) reduce_dim = True if reduce_dim == True: self.W_flair = nn.Linear( self.flair_embeddings_layer.output_dim, 256).cuda() emb_models_dim.append(256) else: emb_models_dim.append(self.flair_embeddings_layer.output_dim) if args.if_elmo: self.elmo_embeddings_layer = LayerElmoEmbeddings( args, gpu, args.options_file, args.weight_file, freeze_char_embeddings, word_len) elmo_reduce_dim = False if elmo_reduce_dim: # self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim,256) # emb_models_dim.append(256) self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim, self.word_embeddings_layer.output_dim) emb_models_dim.append(self.word_embeddings_layer.output_dim) else: emb_models_dim.append(self.elmo_embeddings_layer.output_dim) # if args.if_glove: # self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings) # emb_models_dim.append(self.word_embeddings_layer.output_dim) self.if_word = False if args.if_wordEmbRand == True and args.if_glove == False: self.word_embeddings_layer = LayerWordEmbeddings_Rand( word_seq_indexer, gpu, freeze_word_embeddings) emb_models_dim.append(self.word_embeddings_layer.output_dim) print('load random word emb ') self.if_word = True elif args.if_wordEmbRand == False and args.if_glove == True: self.word_embeddings_layer = LayerWordEmbeddings( args, word_seq_indexer, gpu, freeze_word_embeddings) emb_models_dim.append(self.word_embeddings_layer.output_dim) print('load glove word emb ') self.if_word = True else: print('can only use one word embedding (random or glove)') self.if_char = False if args.if_cnnChar == True and args.if_lstmChar == False: self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size, word_len) emb_models_dim.append(self.char_layer.output_dim) self.if_char = True elif args.if_cnnChar == False and args.if_lstmChar == True: self.char_embeddings_layer = LayerCharEmbeddings( gpu, char_embeddings_dim, freeze_char_embeddings, word_len, word_seq_indexer.get_unique_characters_list()) self.char_layer = LayerCharLSTM(gpu, char_embeddings_dim, self.char_lstm_hidden_dim, word_len) emb_models_dim.append(self.char_layer.output_dim) self.if_char = True else: print('can only use one char embedding (cnnChar or lstmChar)') self.input_dim = sum(emb_models_dim) if self.args.transformer: self.n_head = self.args.trans_head self.emb_dim = int((self.input_dim / self.n_head)) * self.n_head print('self.emb_dim', self.emb_dim) print('self.input_dim', self.input_dim) self.emb_linear = nn.Linear(in_features=self.input_dim, out_features=self.emb_dim) self.transEncodeLayer = TransformerEncoderLayer( d_model=self.emb_dim, nhead=self.n_head) self.transformer_encoder = TransformerEncoder( encoder_layer=self.transEncodeLayer, num_layers=6) self.input_dim = self.emb_dim self.transClassify_lin = nn.Linear(in_features=self.emb_dim, out_features=class_num + 1) if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif self.rnn_type == 'SATN': self.birnn_layer = LayerSelfAttn(args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif self.rnn_type == 'WCNN': self.birnn_layer = LayerWCNN( args=args, input_dim=self.input_dim, hidden_dim=rnn_hidden_dim, cnn_layer=args.wcnn_layer, # wcnn_hidden_dim =args.wcnn_hidden_dim, gpu=gpu) else: raise ValueError( 'Unknown rnn_type = %s, must be either "LSTM" or "GRU"') self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 1) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss( ignore_index=0 ) # "0" target values actually are zero-padded parts of sequences
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, num_prototypes_per_class=6, proto_dim=None, pretrained_path=None, max_pool_protos=False, pooling_type='attention', similarity_epsilon=1e-4, hadamard_importance=False, similarity_function_name='gaussian'): super(TaggerProtoBiRNNFixed, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.dropout = torch.nn.Dropout(p=dropout_ratio) self.num_prototypes_per_class = num_prototypes_per_class self.num_prototypes = class_num * num_prototypes_per_class self.proto_dim = proto_dim self.max_pool = max_pool_protos self.hadamard_importance = hadamard_importance # parameters self.prototypes_shape = ( self.num_prototypes, self.proto_dim, 1 ) # the last dimension is 1 since the prototype vectors are used as a conv1d filter weight self.prototypes = nn.Parameter(torch.rand(self.prototypes_shape)) # layers self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) self.birnn_layer = LayerBiLSTM( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) self.pooler = LayerPooler(input_dim=self.birnn_layer.output_dim, gpu=gpu, pooling_type=pooling_type) self.proto_layer = LayerProto( input_dim=self.proto_dim, prototypes=self.prototypes, num_classes=class_num, num_prototypes_per_class=num_prototypes_per_class, gpu=gpu, max_pool=max_pool_protos, similarity_epsilon=similarity_epsilon, hadamard_importance=hadamard_importance, similarity_function_name=similarity_function_name) self.lin_layer = nn.Linear(in_features=self.proto_layer.output_dim, out_features=class_num, bias=False) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss() # init weights and set grad reqs self._initialize_weights() self._set_grad_reqs()
def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100, freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='LSTM', gpu=-1, latent_dim=None): super(TaggerAttentive, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size) self.tag_seq_indexer = tag_seq_indexer self.class_num = class_num self.rnn_hidden_dim = rnn_hidden_dim self.freeze_embeddings = freeze_word_embeddings self.dropout_ratio = dropout_ratio self.rnn_type = rnn_type self.gpu = gpu self.word_embeddings_layer = LayerWordEmbeddings( word_seq_indexer, gpu, freeze_word_embeddings) self.dropout = torch.nn.Dropout(p=dropout_ratio) self.latent_dim = latent_dim if rnn_type == 'GRU': self.birnn_layer = LayerBiGRU( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'LSTM': self.birnn_layer = LayerBiLSTM( input_dim=self.word_embeddings_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) elif rnn_type == 'Vanilla': self.birnn_layer = LayerBiVanilla( input_dim=self.word_embeddings_layer.output_dim + self.char_cnn_layer.output_dim, hidden_dim=rnn_hidden_dim, gpu=gpu) else: raise ValueError( 'Unknown rnn_type = %s, must be either "LSTM" or "GRU"') # equal weight attention self.attention = LayerAttention( input_dim=self.birnn_layer.output_dim, embedding_dim=self.word_embeddings_layer.output_dim, output_dim=self.birnn_layer.output_dim, gpu=gpu) # dimension reduction if latent_dim is not None: self.dim_red = nn.Sequential( nn.Linear(in_features=self.attention.output_dim + self.word_embeddings_layer.output_dim, out_features=latent_dim), nn.Sigmoid()) self.dim_red.apply(self.inititialize_random_projection) lin_layer_in = latent_dim else: lin_layer_in = self.attention.output_dim + self.word_embeddings_layer.output_dim # We add an additional class that corresponds to the zero-padded values not to be included to the loss function self.lin_layer = nn.Linear(in_features=lin_layer_in, out_features=class_num + 1) self.log_softmax_layer = nn.LogSoftmax(dim=1) if gpu >= 0: self.cuda(device=self.gpu) self.nll_loss = nn.NLLLoss( ignore_index=0 ) # "0" target values actually are zero-padded parts of sequences