def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100,
              freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, useElmo = False, weight_file = '', options_file = ''):
     super(TaggerBiRNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size, useElmo, weight_file, options_file)
     self.tag_seq_indexer = tag_seq_indexer
     self.class_num = class_num
     self.rnn_hidden_dim = rnn_hidden_dim
     self.freeze_embeddings = freeze_word_embeddings
     self.dropout_ratio = dropout_ratio
     self.rnn_type = rnn_type
     self.gpu = gpu
     self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
     self.dropout = torch.nn.Dropout(p=dropout_ratio)
     
     print ("init targer BiRNNcfr")
         
     if rnn_type == 'GRU':
         self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim,
                                       hidden_dim=rnn_hidden_dim,
                                       gpu=gpu)
     elif rnn_type == 'LSTM':
         self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim,
                                        hidden_dim=rnn_hidden_dim,
                                        gpu=gpu)
     elif rnn_type == 'Vanilla':
         self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                        hidden_dim=rnn_hidden_dim,
                                        gpu=gpu)
     else:
         raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
     self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2)
     self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1,
                               tag_seq_indexer=tag_seq_indexer)
     if gpu >= 0:
         self.cuda(device=self.gpu)
Esempio n. 2
0
    def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100,
                 freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1,
                 freeze_char_embeddings = False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30,
                 char_window_size=3, emb_type='word'):
        super(TaggerBiRNNCNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = freeze_word_embeddings
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        #self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
        self.freeze_char_embeddings = freeze_char_embeddings
        self.char_embeddings_dim = char_embeddings_dim
        self.word_len = word_len
        self.char_cnn_filter_num = char_cnn_filter_num
        self.char_window_size = char_window_size
        self.word_embeddings_layer = EmbeddingsFactory.create(emb_type, word_seq_indexer, gpu, freeze_word_embeddings)
        self.char_embeddings_layer = LayerCharEmbeddings(gpu, char_embeddings_dim, freeze_char_embeddings,
                                                         word_len, word_seq_indexer.get_unique_characters_list())
        self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size,
                                           word_len)
        self.dropout = torch.nn.Dropout(p=dropout_ratio)

        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                          hidden_dim=rnn_hidden_dim,
                                          gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        else:
            raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
        self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2)
        self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1,
                                  tag_seq_indexer=tag_seq_indexer)
        self.softmax = nn.Softmax(dim=2)
        if gpu >= 0:
            self.cuda(device=self.gpu)
    def __init__(self,
                 args,
                 word_seq_indexer,
                 tag_seq_indexer,
                 class_num,
                 batch_size=1,
                 rnn_hidden_dim=100,
                 freeze_word_embeddings=False,
                 dropout_ratio=0.5,
                 rnn_type='GRU',
                 gpu=-1,
                 freeze_char_embeddings=False,
                 char_embeddings_dim=25,
                 word_len=20,
                 char_cnn_filter_num=30,
                 char_window_size=3):
        super(TaggerBiRNNCNNCRF,
              self).__init__(word_seq_indexer, tag_seq_indexer, gpu,
                             batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = freeze_word_embeddings
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        self.word_embeddings_layer = LayerWordEmbeddings(
            args, word_seq_indexer, gpu, freeze_word_embeddings)
        self.freeze_char_embeddings = freeze_char_embeddings
        self.char_embeddings_dim = char_embeddings_dim
        self.word_len = word_len
        self.char_cnn_filter_num = char_cnn_filter_num
        self.char_window_size = char_window_size

        self.args = args
        self.if_bert = args.if_bert
        self.if_flair = args.if_flair

        self.dropout = torch.nn.Dropout(p=dropout_ratio)

        # self.bert_embeddings_dim = self.args.bert_embeddings_dim

        emb_models_dim = []
        print('load embedding...')
        if args.if_bert:
            # print('PYTORCH_PRETRAINED_BERT_CACHE',PYTORCH_PRETRAINED_BERT_CACHE)
            cache_dir = args.cache_dir if args.cache_dir else os.path.join(
                str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(
                    args.local_rank))

            # cache_dir =os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),'distributed_{}'.format(args.local_rank))
            print('cache_dir', cache_dir)
            # cache_dir = '/home/jlfu/.pytorch_pretrained_bert/distributed_-1'
            #cache_dir='emb/bert_model_cache/bert_cache.hdf5',
            self.bert_embeddings_layer = LayerBertEmbeddings.from_pretrained(
                args.bert_model, cache_dir=cache_dir, num_labels=class_num)
            # self.bert_embeddings_layer = LayerBertEmbeddings(gpu, freeze_bert_embeddings=True)

            reduce_dim = False
            if reduce_dim:
                self.W_bert = nn.Linear(args.bert_output_dim, 256)
                emb_models_dim.append(256)
            else:
                emb_models_dim.append(args.bert_output_dim)
        if args.if_flair:
            self.flair_embeddings_layer = LayerFlairEmbeddings(gpu)
            reduce_dim = True
            if reduce_dim == True:
                self.W_flair = nn.Linear(
                    self.flair_embeddings_layer.output_dim, 256).cuda()
                emb_models_dim.append(256)
            else:
                emb_models_dim.append(self.flair_embeddings_layer.output_dim)
        if args.if_elmo:
            self.elmo_embeddings_layer = LayerElmoEmbeddings(
                args, gpu, args.options_file, args.weight_file,
                freeze_char_embeddings, word_len)

            elmo_reduce_dim = False
            if elmo_reduce_dim:
                # self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim,256)
                # emb_models_dim.append(256)

                self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim,
                                        self.word_embeddings_layer.output_dim)
                emb_models_dim.append(self.word_embeddings_layer.output_dim)
            else:
                emb_models_dim.append(self.elmo_embeddings_layer.output_dim)

        # if args.if_glove:
        #     self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
        #     emb_models_dim.append(self.word_embeddings_layer.output_dim)
        self.if_word = False
        if args.if_wordEmbRand == True and args.if_glove == False:
            self.word_embeddings_layer = LayerWordEmbeddings_Rand(
                word_seq_indexer, gpu, freeze_word_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)
            print('load random word emb ')
            self.if_word = True
        elif args.if_wordEmbRand == False and args.if_glove == True:
            self.word_embeddings_layer = LayerWordEmbeddings(
                args, word_seq_indexer, gpu, freeze_word_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)
            print('load glove word emb ')
            self.if_word = True
        else:
            print('can only use one word embedding (random or glove)')

        self.if_char = False
        if args.if_cnnChar == True and args.if_lstmChar == False:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharCNN(gpu, char_embeddings_dim,
                                           char_cnn_filter_num,
                                           char_window_size, word_len)
            emb_models_dim.append(self.char_layer.output_dim)
            self.if_char = True

        elif args.if_cnnChar == False and args.if_lstmChar == True:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharLSTM(gpu, char_embeddings_dim,
                                            self.char_lstm_hidden_dim,
                                            word_len)
            emb_models_dim.append(self.char_layer.output_dim)
            self.if_char = True
        else:
            print('can only use one char embedding (cnnChar or lstmChar)')

        self.input_dim = sum(emb_models_dim)

        if self.args.transformer:
            self.n_head = self.args.trans_head
            self.emb_dim = int((self.input_dim / self.n_head)) * self.n_head
            print('self.emb_dim', self.emb_dim)
            print('self.input_dim', self.input_dim)
            self.emb_linear = nn.Linear(in_features=self.input_dim,
                                        out_features=self.emb_dim)
            self.transEncodeLayer = TransformerEncoderLayer(
                d_model=self.emb_dim, nhead=self.n_head)
            self.transformer_encoder = TransformerEncoder(
                encoder_layer=self.transEncodeLayer, num_layers=6)
            self.input_dim = self.emb_dim

            self.transClassify_lin = nn.Linear(in_features=self.emb_dim,
                                               out_features=class_num + 1)

        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(args=args,
                                          input_dim=self.input_dim,
                                          hidden_dim=rnn_hidden_dim,
                                          gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(args=args,
                                           input_dim=self.input_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(args=args,
                                              input_dim=self.input_dim,
                                              hidden_dim=rnn_hidden_dim,
                                              gpu=gpu)
        elif self.rnn_type == 'SATN':
            self.birnn_layer = LayerSelfAttn(args=args,
                                             input_dim=self.input_dim,
                                             hidden_dim=rnn_hidden_dim,
                                             gpu=gpu)
        elif self.rnn_type == 'WCNN':
            self.birnn_layer = LayerWCNN(
                args=args,
                input_dim=self.input_dim,
                hidden_dim=rnn_hidden_dim,
                cnn_layer=args.wcnn_layer,
                # wcnn_hidden_dim =args.wcnn_hidden_dim,
                gpu=gpu)
        else:
            raise ValueError(
                'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')

        self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim,
                                   out_features=class_num + 2)
        self.crf_layer = LayerCRF(gpu,
                                  states_num=class_num + 2,
                                  pad_idx=tag_seq_indexer.pad_idx,
                                  sos_idx=class_num + 1,
                                  tag_seq_indexer=tag_seq_indexer)
        self.softmax = nn.Softmax(dim=2)
        if gpu >= 0:
            self.cuda(device=self.gpu)