def build(self, source_vectors=None, target_vectors=None): hidden_size = self.config.hidden_sizes[0] nb_classes = self.config.nb_classes dropout = self.config.dropout weight = make_loss_weights(nb_classes, const.BAD_ID, self.config.bad_weight) self._loss = nn.CrossEntropyLoss(weight=weight, ignore_index=const.PAD_TAGS_ID) # Embeddings layers: self._build_embeddings(source_vectors, target_vectors) feature_set_size = ( self.config.source_embeddings_size + self.config.target_embeddings_size) * self.config.window_size self.linear = nn.Linear(feature_set_size, hidden_size) self.linear_out = nn.Linear(hidden_size, nb_classes) self.dropout = nn.Dropout(dropout) torch.nn.init.xavier_uniform_(self.linear.weight) torch.nn.init.xavier_uniform_(self.linear_out.weight) torch.nn.init.constant_(self.linear.bias, 0.0) torch.nn.init.constant_(self.linear_out.bias, 0.0) self.is_built = True
def __init__(self, vocabs, predictor_tgt=None, predictor_src=None, **kwargs): super().__init__(vocabs=vocabs, ConfigCls=EstimatorConfig, **kwargs) if predictor_src: self.config.update(predictor_src.config) elif predictor_tgt: self.config.update(predictor_tgt.config) # Predictor Settings # predict_tgt = (self.config.predict_target or self.config.predict_gaps or self.config.sentence_level) if predict_tgt and not predictor_tgt: predictor_tgt = Predictor( vocabs=vocabs, predict_inverse=False, hidden_pred=self.config.hidden_pred, rnn_layers_pred=self.config.rnn_layers_pred, dropout_pred=self.config.dropout_pred, target_embeddings_size=self.config.target_embeddings_size, source_embeddings_size=self.config.source_embeddings_size, out_embeddings_size=self.config.out_embeddings_size, ) if self.config.predict_source and not predictor_src: predictor_src = Predictor( vocabs=vocabs, predict_inverse=True, hidden_pred=self.config.hidden_pred, rnn_layers_pred=self.config.rnn_layers_pred, dropout_pred=self.config.dropout_pred, target_embeddings_size=self.config.target_embeddings_size, source_embeddings_size=self.config.source_embeddings_size, out_embeddings_size=self.config.out_embeddings_size, ) # Update the predictor vocabs if token level == True # Required by `get_mask` call in predictor forward with `pe` side # to determine padding IDs. if self.config.token_level: if predictor_src: predictor_src.vocabs = vocabs if predictor_tgt: predictor_tgt.vocabs = vocabs self.predictor_tgt = predictor_tgt self.predictor_src = predictor_src predictor_hidden = self.config.hidden_pred embedding_size = self.config.out_embeddings_size input_size = 2 * predictor_hidden + embedding_size self.nb_classes = len(const.LABELS) self.lstm_input_size = input_size self.mlp = None self.sentence_pred = None self.sentence_sigma = None self.binary_pred = None self.binary_scale = None # Build Model # if self.config.start_stop: self.start_PreQEFV = nn.Parameter(torch.zeros( 1, 1, embedding_size)) self.end_PreQEFV = nn.Parameter(torch.zeros(1, 1, embedding_size)) if self.config.mlp_est: self.mlp = nn.Sequential( nn.Linear(input_size, self.config.hidden_est), nn.Tanh()) self.lstm_input_size = self.config.hidden_est self.lstm = nn.LSTM( input_size=self.lstm_input_size, hidden_size=self.config.hidden_est, num_layers=self.config.rnn_layers_est, batch_first=True, dropout=self.config.dropout_est, bidirectional=True, ) self.embedding_out = nn.Linear(2 * self.config.hidden_est, self.nb_classes) if self.config.predict_gaps: self.embedding_out_gaps = nn.Linear(4 * self.config.hidden_est, self.nb_classes) self.dropout = None if self.config.dropout_est: self.dropout = nn.Dropout(self.config.dropout_est) # Multitask Learning Objectives # sentence_input_size = (2 * self.config.rnn_layers_est * self.config.hidden_est) if self.config.sentence_level: self.sentence_pred = nn.Sequential( nn.Linear(sentence_input_size, sentence_input_size // 2), nn.Sigmoid(), nn.Linear(sentence_input_size // 2, sentence_input_size // 4), nn.Sigmoid(), nn.Linear(sentence_input_size // 4, 1), ) self.sentence_sigma = None if self.config.sentence_ll: # Predict truncated Gaussian distribution self.sentence_sigma = nn.Sequential( nn.Linear(sentence_input_size, sentence_input_size // 2), nn.Sigmoid(), nn.Linear(sentence_input_size // 2, sentence_input_size // 4), nn.Sigmoid(), nn.Linear(sentence_input_size // 4, 1), nn.Sigmoid(), ) if self.config.binary_level: self.binary_pred = nn.Sequential( nn.Linear(sentence_input_size, sentence_input_size // 2), nn.Tanh(), nn.Linear(sentence_input_size // 2, sentence_input_size // 4), nn.Tanh(), nn.Linear(sentence_input_size // 4, 2), ) # Build Losses # # FIXME: Remove dependency on magic numbers self.xents = nn.ModuleDict() weight = make_loss_weights(self.nb_classes, const.BAD_ID, self.config.target_bad_weight) self.xents[const.TARGET_TAGS] = nn.CrossEntropyLoss( reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight) if self.config.predict_source: weight = make_loss_weights(self.nb_classes, const.BAD_ID, self.config.source_bad_weight) self.xents[const.SOURCE_TAGS] = nn.CrossEntropyLoss( reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight) if self.config.predict_gaps: weight = make_loss_weights(self.nb_classes, const.BAD_ID, self.config.gaps_bad_weight) self.xents[const.GAP_TAGS] = nn.CrossEntropyLoss( reduction='sum', ignore_index=const.PAD_TAGS_ID, weight=weight) if self.config.sentence_level and not self.config.sentence_ll: self.mse_loss = nn.MSELoss(reduction='sum') if self.config.binary_level: self.xent_binary = nn.CrossEntropyLoss(reduction='sum')
def build(self, source_vectors=None, target_vectors=None): nb_classes = self.config.nb_classes # FIXME: Remove dependency on magic number weight = make_loss_weights(nb_classes, const.BAD_ID, self.config.bad_weight) '''start:将词表中的每个token用bert生成embedding表示,修改格式,分别保存在self.source_bert和self.target_bert中''' # path = os.getcwd() # source_bert, target_bert = [], [] # src_bert = np.load(path + '/data/exp2/en_emb.npy', allow_pickle=True).item() # tgt_bert = np.load(path + '/data/exp2/de_emb.npy', allow_pickle=True).item() # for s in src_bert: # source_bert.append(list(src_bert[s].squeeze(0))) # for t in tgt_bert: # target_bert.append(list(tgt_bert[t].squeeze(0))) # self.source_bert = torch.Tensor(source_bert) # self.target_bert = torch.Tensor(target_bert) '''end''' self._loss = nn.CrossEntropyLoss(weight=weight, ignore_index=self.config.tags_pad_id, reduction='sum') # Embeddings layers: self._build_embeddings(source_vectors, target_vectors) feature_set_size = (self.config.source_embeddings_size + self.config.target_embeddings_size) * self.config.window_size l1_dim = self.config.hidden_sizes[0] l2_dim = self.config.hidden_sizes[1] l3_dim = self.config.hidden_sizes[2] l4_dim = self.config.hidden_sizes[3] nb_classes = self.config.nb_classes dropout = self.config.dropout # Linear layers self.linear_1 = nn.Linear(768, l1_dim) self.linear_2 = nn.Linear(l1_dim, l1_dim) self.linear_3 = nn.Linear(2 * l2_dim, l2_dim) self.linear_4 = nn.Linear(l2_dim, l2_dim) self.linear_5 = nn.Linear(2 * l2_dim, l3_dim) self.linear_6 = nn.Linear(l3_dim, l4_dim) # Output layer self.linear_out = nn.Linear(l4_dim, nb_classes) # Recurrent Layers self.gru_1 = nn.GRU(l1_dim, l2_dim, bidirectional=True, batch_first=True) self.gru_2 = nn.GRU(l2_dim, l2_dim, bidirectional=True, batch_first=True) # Dropout after linear layers self.dropout_in = nn.Dropout(dropout) self.dropout_out = nn.Dropout(dropout) # Explicit initializations nn.init.xavier_uniform_(self.linear_1.weight) nn.init.xavier_uniform_(self.linear_2.weight) nn.init.xavier_uniform_(self.linear_3.weight) nn.init.xavier_uniform_(self.linear_4.weight) nn.init.xavier_uniform_(self.linear_5.weight) nn.init.xavier_uniform_(self.linear_6.weight) # nn.init.xavier_uniform_(self.linear_out.weight) nn.init.constant_(self.linear_1.bias, 0.0) nn.init.constant_(self.linear_2.bias, 0.0) nn.init.constant_(self.linear_3.bias, 0.0) nn.init.constant_(self.linear_4.bias, 0.0) nn.init.constant_(self.linear_5.bias, 0.0) nn.init.constant_(self.linear_6.bias, 0.0) # nn.init.constant_(self.linear_out.bias, 0.0) self.is_built = True
def build(self, source_vectors=None, target_vectors=None): nb_classes = self.config.nb_classes # FIXME: Remove dependency on magic number weight = make_loss_weights(nb_classes, const.BAD_ID, self.config.bad_weight) self._loss = nn.CrossEntropyLoss(weight=weight, ignore_index=self.config.tags_pad_id, reduction='sum') # Embeddings layers: self._build_embeddings(source_vectors, target_vectors) feature_set_size = ( self.config.source_embeddings_size + self.config.target_embeddings_size) * self.config.window_size l1_dim = self.config.hidden_sizes[0] l2_dim = self.config.hidden_sizes[1] l3_dim = self.config.hidden_sizes[2] l4_dim = self.config.hidden_sizes[3] nb_classes = self.config.nb_classes dropout = self.config.dropout # Linear layers self.linear_1 = nn.Linear(feature_set_size, l1_dim) self.linear_2 = nn.Linear(l1_dim, l1_dim) self.linear_3 = nn.Linear(2 * l2_dim, l2_dim) self.linear_4 = nn.Linear(l2_dim, l2_dim) self.linear_5 = nn.Linear(2 * l2_dim, l3_dim) self.linear_6 = nn.Linear(l3_dim, l4_dim) # Output layer self.linear_out = nn.Linear(l4_dim, nb_classes) # Recurrent Layers self.gru_1 = nn.GRU(l1_dim, l2_dim, bidirectional=True, batch_first=True) self.gru_2 = nn.GRU(l2_dim, l2_dim, bidirectional=True, batch_first=True) # Dropout after linear layers self.dropout_in = nn.Dropout(dropout) self.dropout_out = nn.Dropout(dropout) # Explicit initializations nn.init.xavier_uniform_(self.linear_1.weight) nn.init.xavier_uniform_(self.linear_2.weight) nn.init.xavier_uniform_(self.linear_3.weight) nn.init.xavier_uniform_(self.linear_4.weight) nn.init.xavier_uniform_(self.linear_5.weight) nn.init.xavier_uniform_(self.linear_6.weight) # nn.init.xavier_uniform_(self.linear_out) nn.init.constant_(self.linear_1.bias, 0.0) nn.init.constant_(self.linear_2.bias, 0.0) nn.init.constant_(self.linear_3.bias, 0.0) nn.init.constant_(self.linear_4.bias, 0.0) nn.init.constant_(self.linear_5.bias, 0.0) nn.init.constant_(self.linear_6.bias, 0.0) # nn.init.constant_(self.linear_out.bias, 0.) self.is_built = True