Example #1
0
    def __init__(self, hyper) -> None:
        super(Twotagging, self).__init__()
        self.hyper = hyper
        self.data_root = hyper.data_root
        self.gpu = hyper.gpu

        self.word_vocab = json.load(
            open(os.path.join(self.data_root, "word_vocab.json"), "r"))
        self.relation_vocab = json.load(
            open(os.path.join(self.data_root, "relation_vocab.json"), "r"))
        self.id2word = {v: k for k, v in self.word_vocab.items()}
        self.id2rel = {v: k for k, v in self.relation_vocab.items()}

        self.S = s_model(len(self.word_vocab), self.hyper.emb_size,
                         self.hyper.hidden_size)
        self.PO = po_model(
            len(self.word_vocab),
            self.hyper.emb_size,
            self.hyper.hidden_size,
            len(self.relation_vocab),
        )  # 49
        self.CE = torch.nn.CrossEntropyLoss()
        self.BCE = torch.nn.BCEWithLogitsLoss()
        self.metrics = F1_triplet()
        self.get_metric = self.metrics.get_metric
Example #2
0
    def __init__(self, hyper):
        super(WDec, self).__init__()
        self.hyper = hyper
        self.data_root = hyper.data_root
        self.gpu = hyper.gpu

        self.word_vocab = json.load(
            open(os.path.join(self.data_root, "word_vocab.json"),
                 "r",
                 encoding="utf-8"))
        self.relation_vocab = json.load(
            open(
                os.path.join(self.data_root, "relation_vocab.json"),
                "r",
                encoding="utf-8",
            ))
        self.rev_vocab = {v: k for k, v in self.word_vocab.items()}

        self.word_embeddings = nn.Embedding(len(self.word_vocab),
                                            self.hyper.emb_size)

        # self.word_embeddings = WordEmbeddings(len(word_vocab), word_embed_dim, word_embed_matrix, drop_rate)
        self.encoder = Encoder(enc_inp_size, int(enc_hidden_size / 2), layers,
                               True, drop_rate)
        self.decoder = Decoder(
            dec_inp_size,
            dec_hidden_size,
            layers,
            drop_rate,
            max_trg_len,
            len(self.word_vocab),
        )
        self.criterion = nn.NLLLoss(ignore_index=0)
        self.metrics = F1_triplet()
        self.get_metric = self.metrics.get_metric
    def __init__(self, exp_name: str):
        self.exp_name = exp_name
        self.model_dir = 'saved_models'

        self.hyper = Hyper(os.path.join('experiments',
                                        self.exp_name + '.json'))

        self.gpu = self.hyper.gpu

        if self.hyper.is_bert == 'ERNIE':
            self.preprocessor = Chinese_selection_preprocessing(self.hyper)

        elif self.hyper.is_bert == "bert_bilstem_crf":
            self.preprocessor = NYT_selection_preprocessing(self.hyper)

        elif self.hyper.is_bert == "nyt_bert_tokenizer":
            self.preprocessor = NYT_bert_selection_preprocessing(self.hyper)

        elif self.hyper.is_bert == "nyt11_bert_tokenizer":
            self.preprocessor = NYT11_bert_selection_preprocessing(self.hyper)

        elif self.hyper.is_bert == "nyt10_bert_tokenizer":
            self.preprocessor = NYT10_bert_selection_preprocessing(self.hyper)

        self.metrics = F1_triplet()
    def __init__(self, exp_name: str):
        self.exp_name = exp_name
        self.model_dir = 'saved_models'

        self.hyper = Hyper(os.path.join('experiments',
                                        self.exp_name + '.json'))

        self.gpu = self.hyper.gpu
        self.preprocessor = None
        self.metrics = F1_triplet()
        self.optimizer = None
        self.model = None
Example #5
0
    def __init__(self, exp_name: str, model_name):
        self.exp_name = exp_name
        self.model_dir = 'saved_models'
        self.model_name = model_name
        self.hyper = Hyper(os.path.join('experiments',
                                        self.exp_name + '.json'))

        self.hyper.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.gpu = self.hyper.gpu
        self.preprocessor = None
        self.triplet_metrics = F1_triplet()
        self.ner_metrics = F1_ner()
        self.save_err = SaveError()
        self.save_rc = SaveRecord(self.exp_name)
        self.p_metrics = F1_P()
        self.optimizer = None
        self.model = None
        self.model_p = None
Example #6
0
    def __init__(self, hyper):
        super(Seq2umt, self).__init__()
        self.hyper = hyper
        self.order = hyper.order
        self.data_root = hyper.data_root
        self.gpu = hyper.gpu

        self.word_vocab = json.load(
            open(os.path.join(self.data_root, "word_vocab.json"),
                 "r",
                 encoding="utf-8"))

        self.mBCE = MaskedBCE()
        self.BCE = torch.nn.BCEWithLogitsLoss()
        self.metrics = F1_triplet()
        # self.metrics = F1_()

        self.get_metric = self.metrics.get_metric
        self.encoder = Encoder(len(self.word_vocab), self.hyper.emb_size,
                               self.hyper.hidden_size)
        self.decoder = Decoder(hyper, self.word_vocab)
        self.sos = nn.Embedding(num_embeddings=1,
                                embedding_dim=self.hyper.emb_size)
Example #7
0
    def __init__(self, hyper) -> None:
        super(CopyMTL, self).__init__()

        self.hyper = hyper
        self.data_root = hyper.data_root
        self.gpu = hyper.gpu

        self.word_vocab = json.load(
            open(os.path.join(self.data_root, "word_vocab.json"), "r"))
        self.relation_vocab = json.load(
            open(os.path.join(self.data_root, "relation_vocab.json"), "r"))
        self.bio_vocab = json.load(
            open(os.path.join(self.data_root, "bio_vocab.json"), "r"))

        self.word_embeddings = nn.Embedding(num_embeddings=len(
            self.word_vocab),
                                            embedding_dim=hyper.emb_size)

        self.relation_emb = nn.Embedding(num_embeddings=len(
            self.relation_vocab),
                                         embedding_dim=hyper.rel_emb_size)
        # bio + pad
        self.bio_emb = nn.Embedding(num_embeddings=len(self.bio_vocab),
                                    embedding_dim=hyper.bio_emb_size)

        self.sos = nn.Embedding(num_embeddings=1, embedding_dim=hyper.emb_size)

        if hyper.cell_name == "gru":
            self.encoder = nn.GRU(hyper.emb_size,
                                  hyper.hidden_size,
                                  bidirectional=True,
                                  batch_first=True)
            self.decoder = nn.GRU(
                hyper.emb_size,
                hyper.hidden_size + hyper.bio_emb_size,
                bidirectional=False,
                batch_first=True,
            )
        elif hyper.cell_name == "lstm":
            self.encoder = nn.LSTM(hyper.emb_size,
                                   hyper.hidden_size,
                                   bidirectional=True,
                                   batch_first=True)
            self.decoder = nn.LSTM(
                hyper.emb_size,
                # hyper.hidden_size + hyper.bio_emb_size,
                hyper.hidden_size,
                bidirectional=False,
                batch_first=True,
            )
        else:
            raise ValueError("cell name should be gru/lstm!")

        if hyper.activation.lower() == "relu":
            self.activation = nn.ReLU()
        elif hyper.activation.lower() == "tanh":
            self.activation = nn.Tanh()
        elif hyper.activation.lower() == "gelu":
            self.activation = F.gelu
        else:
            raise ValueError("unexpected activation!")

        self.tagger = CRF(len(self.bio_vocab) - 1, batch_first=True)
        # here the 'N' relation is used as <eos> in standard seq2seq
        self.rel_linear_1 = nn.Linear(hyper.hidden_size + hyper.bio_emb_size,
                                      len(self.relation_vocab))
        self.rel_linear_a = nn.Linear(hyper.hidden_size + hyper.bio_emb_size,
                                      hyper.hidden_size)
        self.rel_linear_b = nn.Linear(hyper.hidden_size + hyper.bio_emb_size,
                                      len(self.relation_vocab))
        self.relation_embedding = nn.Embedding(
            len(self.relation_vocab) + 1, hyper.hidden_size)

        self.combine_inputs = nn.Linear(hyper.hidden_size + hyper.emb_size,
                                        hyper.emb_size)
        self.attn = nn.Linear(hyper.hidden_size * 2, 1)

        self.entity_linear_1 = nn.Linear(
            hyper.hidden_size * 3 + 3 * hyper.bio_emb_size, hyper.hidden_size)
        self.entity_linear_2 = nn.Linear(hyper.hidden_size, 1)

        self.cat_linear = nn.Linear(hyper.hidden_size + hyper.bio_emb_size,
                                    hyper.hidden_size)
        self.sos_embedding = nn.Embedding(1, hyper.emb_size)

        self.do_eos = nn.Linear(hyper.hidden_size, 1)
        self.do_predict = nn.Linear(hyper.hidden_size,
                                    len(self.relation_vocab))

        self.fuse = nn.Linear(hyper.hidden_size * 2, 100)
        self.do_copy_linear = nn.Linear(100, 1)

        self.emission = nn.Linear(hyper.hidden_size, len(self.bio_vocab) - 1)
        # self.ce = nn.CrossEntropyLoss(reduction="none")
        self.loss = nn.NLLLoss(reduction="none")
        self.metrics = F1_triplet()
        self.get_metric = self.metrics.get_metric
Example #8
0
    def __init__(self, hyper) -> None:
        super(MultiHeadSelection, self).__init__()

        self.hyper = hyper
        self.data_root = hyper.data_root
        self.gpu = hyper.gpu

        self.word_vocab = json.load(
            open(os.path.join(self.data_root, "word_vocab.json"), "r")
        )
        self.relation_vocab = json.load(
            open(os.path.join(self.data_root, "relation_vocab.json"), "r")
        )
        self.bio_vocab = json.load(
            open(os.path.join(self.data_root, "bio_vocab.json"), "r")
        )

        self.word_embeddings = nn.Embedding(
            num_embeddings=len(self.word_vocab), embedding_dim=hyper.emb_size
        )

        self.relation_emb = nn.Embedding(
            num_embeddings=len(self.relation_vocab), embedding_dim=hyper.rel_emb_size
        )
        # bio + pad
        self.bio_emb = nn.Embedding(
            num_embeddings=len(self.bio_vocab), embedding_dim=hyper.rel_emb_size
        )

        if hyper.cell_name == "gru":
            self.encoder = nn.GRU(
                hyper.emb_size, hyper.hidden_size, bidirectional=True, batch_first=True
            )
        elif hyper.cell_name == "lstm":
            self.encoder = nn.LSTM(
                hyper.emb_size, hyper.hidden_size, bidirectional=True, batch_first=True
            )
        else:
            raise ValueError("cell name should be gru/lstm!")

        if hyper.activation.lower() == "relu":
            self.activation = nn.ReLU()
        elif hyper.activation.lower() == "tanh":
            self.activation = nn.Tanh()
        elif hyper.activation.lower() == "gelu":
            self.activation = F.gelu
        else:
            raise ValueError("unexpected activation!")

        self.tagger = CRF(len(self.bio_vocab) - 1, batch_first=True)

        self.selection_u = nn.Linear(
            hyper.hidden_size + hyper.rel_emb_size, hyper.rel_emb_size
        )
        self.selection_v = nn.Linear(
            hyper.hidden_size + hyper.rel_emb_size, hyper.rel_emb_size
        )
        self.selection_uv = nn.Linear(2 * hyper.rel_emb_size, hyper.rel_emb_size)
        # remove <pad>
        self.emission = nn.Linear(hyper.hidden_size, len(self.bio_vocab) - 1)

        self.metrics = F1_triplet()
        self.get_metric = self.metrics.get_metric