def build(self, source_vectors=None, target_vectors=None):

        hidden_size = self.config.hidden_sizes[0]
        nb_classes = self.config.nb_classes
        dropout = self.config.dropout

        weight = make_loss_weights(nb_classes, const.BAD_ID,
                                   self.config.bad_weight)

        self._loss = nn.CrossEntropyLoss(weight=weight,
                                         ignore_index=const.PAD_TAGS_ID)

        # Embeddings layers:
        self._build_embeddings(source_vectors, target_vectors)

        feature_set_size = (
            self.config.source_embeddings_size +
            self.config.target_embeddings_size) * self.config.window_size

        self.linear = nn.Linear(feature_set_size, hidden_size)
        self.linear_out = nn.Linear(hidden_size, nb_classes)

        self.dropout = nn.Dropout(dropout)

        torch.nn.init.xavier_uniform_(self.linear.weight)
        torch.nn.init.xavier_uniform_(self.linear_out.weight)
        torch.nn.init.constant_(self.linear.bias, 0.0)
        torch.nn.init.constant_(self.linear_out.bias, 0.0)

        self.is_built = True
    def __init__(self,
                 vocabs,
                 predictor_tgt=None,
                 predictor_src=None,
                 **kwargs):

        super().__init__(vocabs=vocabs, ConfigCls=EstimatorConfig, **kwargs)

        if predictor_src:
            self.config.update(predictor_src.config)
        elif predictor_tgt:
            self.config.update(predictor_tgt.config)

        # Predictor Settings #
        predict_tgt = (self.config.predict_target or self.config.predict_gaps
                       or self.config.sentence_level)
        if predict_tgt and not predictor_tgt:
            predictor_tgt = Predictor(
                vocabs=vocabs,
                predict_inverse=False,
                hidden_pred=self.config.hidden_pred,
                rnn_layers_pred=self.config.rnn_layers_pred,
                dropout_pred=self.config.dropout_pred,
                target_embeddings_size=self.config.target_embeddings_size,
                source_embeddings_size=self.config.source_embeddings_size,
                out_embeddings_size=self.config.out_embeddings_size,
            )
        if self.config.predict_source and not predictor_src:
            predictor_src = Predictor(
                vocabs=vocabs,
                predict_inverse=True,
                hidden_pred=self.config.hidden_pred,
                rnn_layers_pred=self.config.rnn_layers_pred,
                dropout_pred=self.config.dropout_pred,
                target_embeddings_size=self.config.target_embeddings_size,
                source_embeddings_size=self.config.source_embeddings_size,
                out_embeddings_size=self.config.out_embeddings_size,
            )

        # Update the predictor vocabs if token level == True
        # Required by `get_mask` call in predictor forward with `pe` side
        # to determine padding IDs.
        if self.config.token_level:
            if predictor_src:
                predictor_src.vocabs = vocabs
            if predictor_tgt:
                predictor_tgt.vocabs = vocabs

        self.predictor_tgt = predictor_tgt
        self.predictor_src = predictor_src

        predictor_hidden = self.config.hidden_pred
        embedding_size = self.config.out_embeddings_size
        input_size = 2 * predictor_hidden + embedding_size

        self.nb_classes = len(const.LABELS)
        self.lstm_input_size = input_size

        self.mlp = None
        self.sentence_pred = None
        self.sentence_sigma = None
        self.binary_pred = None
        self.binary_scale = None

        # Build Model #

        if self.config.start_stop:
            self.start_PreQEFV = nn.Parameter(torch.zeros(
                1, 1, embedding_size))
            self.end_PreQEFV = nn.Parameter(torch.zeros(1, 1, embedding_size))

        if self.config.mlp_est:
            self.mlp = nn.Sequential(
                nn.Linear(input_size, self.config.hidden_est), nn.Tanh())
            self.lstm_input_size = self.config.hidden_est

        self.lstm = nn.LSTM(
            input_size=self.lstm_input_size,
            hidden_size=self.config.hidden_est,
            num_layers=self.config.rnn_layers_est,
            batch_first=True,
            dropout=self.config.dropout_est,
            bidirectional=True,
        )
        self.embedding_out = nn.Linear(2 * self.config.hidden_est,
                                       self.nb_classes)
        if self.config.predict_gaps:
            self.embedding_out_gaps = nn.Linear(4 * self.config.hidden_est,
                                                self.nb_classes)
        self.dropout = None
        if self.config.dropout_est:
            self.dropout = nn.Dropout(self.config.dropout_est)

        # Multitask Learning Objectives #
        sentence_input_size = (2 * self.config.rnn_layers_est *
                               self.config.hidden_est)
        if self.config.sentence_level:
            self.sentence_pred = nn.Sequential(
                nn.Linear(sentence_input_size, sentence_input_size // 2),
                nn.Sigmoid(),
                nn.Linear(sentence_input_size // 2, sentence_input_size // 4),
                nn.Sigmoid(),
                nn.Linear(sentence_input_size // 4, 1),
            )
            self.sentence_sigma = None
            if self.config.sentence_ll:
                # Predict truncated Gaussian distribution
                self.sentence_sigma = nn.Sequential(
                    nn.Linear(sentence_input_size, sentence_input_size // 2),
                    nn.Sigmoid(),
                    nn.Linear(sentence_input_size // 2,
                              sentence_input_size // 4),
                    nn.Sigmoid(),
                    nn.Linear(sentence_input_size // 4, 1),
                    nn.Sigmoid(),
                )
        if self.config.binary_level:
            self.binary_pred = nn.Sequential(
                nn.Linear(sentence_input_size, sentence_input_size // 2),
                nn.Tanh(),
                nn.Linear(sentence_input_size // 2, sentence_input_size // 4),
                nn.Tanh(),
                nn.Linear(sentence_input_size // 4, 2),
            )

        # Build Losses #

        # FIXME: Remove dependency on magic numbers
        self.xents = nn.ModuleDict()
        weight = make_loss_weights(self.nb_classes, const.BAD_ID,
                                   self.config.target_bad_weight)

        self.xents[const.TARGET_TAGS] = nn.CrossEntropyLoss(
            reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight)
        if self.config.predict_source:
            weight = make_loss_weights(self.nb_classes, const.BAD_ID,
                                       self.config.source_bad_weight)
            self.xents[const.SOURCE_TAGS] = nn.CrossEntropyLoss(
                reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight)
        if self.config.predict_gaps:
            weight = make_loss_weights(self.nb_classes, const.BAD_ID,
                                       self.config.gaps_bad_weight)
            self.xents[const.GAP_TAGS] = nn.CrossEntropyLoss(
                reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight)
        if self.config.sentence_level and not self.config.sentence_ll:
            self.mse_loss = nn.MSELoss(reduction='sum')
        if self.config.binary_level:
            self.xent_binary = nn.CrossEntropyLoss(reduction='sum')
Exemple #3
0
    def build(self, source_vectors=None, target_vectors=None):
        nb_classes = self.config.nb_classes
        # FIXME: Remove dependency on magic number
        weight = make_loss_weights(nb_classes, const.BAD_ID, self.config.bad_weight)

        '''start:将词表中的每个token用bert生成embedding表示,修改格式,分别保存在self.source_bert和self.target_bert中'''
        # path = os.getcwd()
        # source_bert, target_bert = [], []
        # src_bert = np.load(path + '/data/exp2/en_emb.npy', allow_pickle=True).item()
        # tgt_bert = np.load(path + '/data/exp2/de_emb.npy', allow_pickle=True).item()
        # for s in src_bert:
        #     source_bert.append(list(src_bert[s].squeeze(0)))
        # for t in tgt_bert:
        #     target_bert.append(list(tgt_bert[t].squeeze(0)))
        # self.source_bert = torch.Tensor(source_bert)
        # self.target_bert = torch.Tensor(target_bert)
        '''end'''

        self._loss = nn.CrossEntropyLoss(weight=weight, ignore_index=self.config.tags_pad_id, reduction='sum')

        # Embeddings layers:
        self._build_embeddings(source_vectors, target_vectors)

        feature_set_size = (self.config.source_embeddings_size + self.config.target_embeddings_size) * self.config.window_size

        l1_dim = self.config.hidden_sizes[0]
        l2_dim = self.config.hidden_sizes[1]
        l3_dim = self.config.hidden_sizes[2]
        l4_dim = self.config.hidden_sizes[3]

        nb_classes = self.config.nb_classes
        dropout = self.config.dropout

        # Linear layers
        self.linear_1 = nn.Linear(768, l1_dim)
        self.linear_2 = nn.Linear(l1_dim, l1_dim)
        self.linear_3 = nn.Linear(2 * l2_dim, l2_dim)
        self.linear_4 = nn.Linear(l2_dim, l2_dim)
        self.linear_5 = nn.Linear(2 * l2_dim, l3_dim)
        self.linear_6 = nn.Linear(l3_dim, l4_dim)

        # Output layer
        self.linear_out = nn.Linear(l4_dim, nb_classes)

        # Recurrent Layers
        self.gru_1 = nn.GRU(l1_dim, l2_dim, bidirectional=True, batch_first=True)
        self.gru_2 = nn.GRU(l2_dim, l2_dim, bidirectional=True, batch_first=True)

        # Dropout after linear layers
        self.dropout_in = nn.Dropout(dropout)
        self.dropout_out = nn.Dropout(dropout)

        # Explicit initializations
        nn.init.xavier_uniform_(self.linear_1.weight)
        nn.init.xavier_uniform_(self.linear_2.weight)
        nn.init.xavier_uniform_(self.linear_3.weight)
        nn.init.xavier_uniform_(self.linear_4.weight)
        nn.init.xavier_uniform_(self.linear_5.weight)
        nn.init.xavier_uniform_(self.linear_6.weight)
        # nn.init.xavier_uniform_(self.linear_out.weight)
        nn.init.constant_(self.linear_1.bias, 0.0)
        nn.init.constant_(self.linear_2.bias, 0.0)
        nn.init.constant_(self.linear_3.bias, 0.0)
        nn.init.constant_(self.linear_4.bias, 0.0)
        nn.init.constant_(self.linear_5.bias, 0.0)
        nn.init.constant_(self.linear_6.bias, 0.0)
        # nn.init.constant_(self.linear_out.bias, 0.0)

        self.is_built = True
    def build(self, source_vectors=None, target_vectors=None):
        nb_classes = self.config.nb_classes
        # FIXME: Remove dependency on magic number
        weight = make_loss_weights(nb_classes, const.BAD_ID,
                                   self.config.bad_weight)

        self._loss = nn.CrossEntropyLoss(weight=weight,
                                         ignore_index=self.config.tags_pad_id,
                                         reduction='sum')

        # Embeddings layers:
        self._build_embeddings(source_vectors, target_vectors)

        feature_set_size = (
            self.config.source_embeddings_size +
            self.config.target_embeddings_size) * self.config.window_size

        l1_dim = self.config.hidden_sizes[0]
        l2_dim = self.config.hidden_sizes[1]
        l3_dim = self.config.hidden_sizes[2]
        l4_dim = self.config.hidden_sizes[3]

        nb_classes = self.config.nb_classes
        dropout = self.config.dropout

        # Linear layers
        self.linear_1 = nn.Linear(feature_set_size, l1_dim)
        self.linear_2 = nn.Linear(l1_dim, l1_dim)
        self.linear_3 = nn.Linear(2 * l2_dim, l2_dim)
        self.linear_4 = nn.Linear(l2_dim, l2_dim)
        self.linear_5 = nn.Linear(2 * l2_dim, l3_dim)
        self.linear_6 = nn.Linear(l3_dim, l4_dim)

        # Output layer
        self.linear_out = nn.Linear(l4_dim, nb_classes)

        # Recurrent Layers
        self.gru_1 = nn.GRU(l1_dim,
                            l2_dim,
                            bidirectional=True,
                            batch_first=True)
        self.gru_2 = nn.GRU(l2_dim,
                            l2_dim,
                            bidirectional=True,
                            batch_first=True)

        # Dropout after linear layers
        self.dropout_in = nn.Dropout(dropout)
        self.dropout_out = nn.Dropout(dropout)

        # Explicit initializations
        nn.init.xavier_uniform_(self.linear_1.weight)
        nn.init.xavier_uniform_(self.linear_2.weight)
        nn.init.xavier_uniform_(self.linear_3.weight)
        nn.init.xavier_uniform_(self.linear_4.weight)
        nn.init.xavier_uniform_(self.linear_5.weight)
        nn.init.xavier_uniform_(self.linear_6.weight)
        # nn.init.xavier_uniform_(self.linear_out)
        nn.init.constant_(self.linear_1.bias, 0.0)
        nn.init.constant_(self.linear_2.bias, 0.0)
        nn.init.constant_(self.linear_3.bias, 0.0)
        nn.init.constant_(self.linear_4.bias, 0.0)
        nn.init.constant_(self.linear_5.bias, 0.0)
        nn.init.constant_(self.linear_6.bias, 0.0)
        # nn.init.constant_(self.linear_out.bias, 0.)

        self.is_built = True