def __init__(self, config, corpus_target, embReader): super().__init__(config) #### # init parameters self.corpus_target = config.corpus_target self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.avg_num_sents = config.avg_num_sents self.batch_size = config.batch_size self.avg_len_doc = config.avg_len_doc self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.embed_size = config.embed_size self.dropout_rate = config.dropout self.rnn_cell_size = config.rnn_cell_size self.path_pretrained_emb = config.path_pretrained_emb self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu self.gen_logs = config.gen_logs if not hasattr(config, "freeze_step"): config.freeze_step = 5000 ######## # self.base_encoder = Encoder_Main(config, embReader) # self.sim_cosine_d0 = torch.nn.CosineSimilarity(dim=0) self.sim_cosine_d2 = torch.nn.CosineSimilarity(dim=2) ##################### fc_in_size = self.base_encoder.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_1 = nn.Linear(fc_in_size, linear_1_out) nn.init.xavier_uniform_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) nn.init.xavier_uniform_(self.linear_2.weight) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # if a bias is given init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) nn.init.xavier_uniform_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.dropout_01 = nn.Dropout(0.1) self.dropout_02 = nn.Dropout(0.2) self.softmax = nn.Softmax(dim=1) self.layer_norm1 = nn.LayerNorm(linear_1_out, eps=1e-6) self.layer_norm2 = nn.LayerNorm(linear_2_out, eps=1e-6) # Multi-task : order的两个线性层 self.linear_order_1 = nn.Linear( fc_in_size * 2, fc_in_size) # fc_in_size = self.base_encoder.encoder_out_size nn.init.xavier_uniform_(self.linear_order_1.weight) self.linear_order_2 = nn.Linear(fc_in_size, fc_in_size // 2) nn.init.xavier_uniform_(self.linear_order_2.weight) self.linear_order_out = nn.Linear(fc_in_size // 2, 1) nn.init.xavier_uniform_(self.linear_order_out.weight) return
def __init__(self, config, corpus_target, embReader): super().__init__(config) #### # init parameters self.corpus_target = config.corpus_target self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.avg_num_sents = config.avg_num_sents self.batch_size = config.batch_size self.avg_len_doc = config.avg_len_doc self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.dropout_rate = config.dropout self.output_size = config.output_size # the number of final output class self.use_gpu = config.use_gpu self.gen_logs = config.gen_logs if not hasattr(config, "freeze_step"): config.freeze_step = 5000 self.output_attentions = config.output_attentions # flag for huggingface impl self.topk_fwr = config.topk_fwr self.threshold_sim = config.threshold_sim # self.topk_back = config.topk_back self.topk_back = 1 ######## # self.base_encoder = Encoder_Main(config, embReader) # self.sim_cosine_d0 = torch.nn.CosineSimilarity(dim=0) self.sim_cosine_d1 = torch.nn.CosineSimilarity(dim=1) self.sim_cosine_d2 = torch.nn.CosineSimilarity(dim=2) ## tree-transformer c = copy.deepcopy num_heads = 4 N = 4 # num of layers d_model = self.base_encoder.encoder_out_size d_ff = self.base_encoder.encoder_out_size dropout = self.dropout_rate attn = tt_attn.MultiHeadedAttention(num_heads, d_model) group_attn = tt_attn.GroupAttention(d_model) ff = tt_module.PositionwiseFeedForward(d_model, d_ff, dropout) position = tt_module.PositionalEncoding(d_model, dropout) # word_embed = nn.Sequential(Embeddings(d_model, vocab_size), c(position)) self.tt_encoder = tt_model.Encoder( tt_model.EncoderLayer(d_model, c(attn), c(ff), group_attn, dropout), N, d_model, dropout) # we do not need an embedding layer here for p in self.tt_encoder.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) if self.use_gpu: self.tt_encoder.cuda() self.context_weight = nn.Parameter( torch.zeros(self.base_encoder.encoder_out_size, 1)) nn.init.xavier_uniform_(self.context_weight) ##################### fc_in_size = self.base_encoder.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_1 = nn.Linear(fc_in_size, linear_1_out) nn.init.xavier_uniform_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) nn.init.xavier_uniform_(self.linear_2.weight) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) nn.init.xavier_uniform_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.dropout_01 = nn.Dropout(0.1) self.dropout_02 = nn.Dropout(0.2) self.softmax = nn.Softmax(dim=1) # self.layer_norm1 = nn.LayerNorm(linear_1_out, eps=1e-6) # self.layer_norm2 = nn.LayerNorm(linear_2_out, eps=1e-6) return
def __init__(self, config, corpus_target, embReader): super().__init__(config) #### self.corpus_target = config.corpus_target self.target_model = config.target_model.lower() # init parameters self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.embed_size = config.embed_size self.dropout_rate = config.dropout self.rnn_cell_size = config.rnn_cell_size self.path_pretrained_emb = config.path_pretrained_emb self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu if not hasattr(config, "freeze_step"): config.freeze_step = 5000 ######## # self.base_encoder = Encoder_Main(config, embReader) # self.conv_size = 5 self.conv = nn.Conv1d( in_channels=1, # conv size is 5 according to the original impl out_channels=100, kernel_size=self.conv_size, stride=1, padding=1, dilation=1, groups=1, bias=True) self.conv_output_size = 100 # according to original paer # original impl: extend to 100 dim by kernel, then avg pool to 1 for each kernel dim (following original impl) self.size_avg_pool = 1 self.avg_adapt_pool1 = nn.AdaptiveAvgPool1d(self.size_avg_pool) # # fc_in_size = self.encoder_coh.encoder_out_size fc_in_size = self.conv_output_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 # implement attention by linear (general version) self.attn = nn.Linear(self.max_len_doc * self.conv_output_size, self.max_len_doc, bias=True) #nn.init.xavier_uniform_(self.attn.weight) nn.init.xavier_normal_(self.attn.weight) # implement attention by parameter (bahdanau style) self.word_weight = nn.Parameter( torch.Tensor(self.conv_output_size, self.conv_output_size)) self.word_bias = nn.Parameter(torch.zeros(self.conv_output_size)) self.context_weight = nn.Parameter(torch.zeros(self.conv_output_size)) nn.init.xavier_normal_(self.word_weight) self.linear_1 = nn.Linear(fc_in_size, linear_1_out) self.bn1 = nn.BatchNorm1d(num_features=linear_1_out) #nn.init.xavier_uniform_(self.linear_1.weight) nn.init.xavier_normal_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) #nn.init.xavier_uniform_(self.linear_2.weight) nn.init.xavier_normal_(self.linear_2.weight) self.bn2 = nn.BatchNorm1d(num_features=linear_2_out) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) #nn.init.xavier_uniform_(self.linear_out.weight) nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.softmax = nn.Softmax(dim=1) return
def __init__(self, config, corpus_target, embReader): super().__init__(config) #### # init parameters self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.size_avg_pool_sent = config.size_avg_pool_sent self.corpus_target = config.corpus_target self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.dropout_rate = config.dropout self.rnn_cell_size = config.rnn_cell_size self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu self.gen_logs = config.gen_logs if not hasattr(config, "freeze_step"): config.freeze_step = 5000 ######## # # self.base_encoder = Encoder_Coh(config, embReader) self.base_encoder = Encoder_Main(config, embReader) # self.sim_cosine = torch.nn.CosineSimilarity(dim=2) self.conv_sent = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=2, padding=1, dilation=1, groups=1, bias=True) # bias=False) self.max_adapt_pool1_sent = nn.AdaptiveMaxPool1d( self.size_avg_pool_sent) # fc_in_size = self.base_encoder.encoder_out_size + self.size_avg_pool_sent linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_1 = nn.Linear(fc_in_size, linear_1_out) nn.init.xavier_normal_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) nn.init.xavier_uniform_(self.linear_2.weight) nn.init.xavier_normal_(self.linear_2.weight) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) # nn.init.xavier_uniform_(self.linear_out.weight) nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.dropout_01 = nn.Dropout(0.1) self.dropout_02 = nn.Dropout(0.2) self.softmax = nn.Softmax(dim=1) return
def __init__(self, config, corpus_target, embReader): # super(Coh_Model_ILCR_Simple, self).__init__(config) super().__init__(config) #### # init parameters self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.corpus_target = config.corpus_target self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.embed_size = config.embed_size self.dropout_rate = config.dropout self.rnn_cell_size = config.rnn_cell_size self.path_pretrained_emb = config.path_pretrained_emb self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu if not hasattr(config, "freeze_step"): config.freeze_step = 5000 ######## # self.base_encoder = Encoder_Main(config, embReader) # fc_in_size = self.base_encoder.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_1 = nn.Linear(fc_in_size, linear_1_out) self.bn1 = nn.BatchNorm1d(num_features=linear_1_out) #nn.init.xavier_uniform_(self.linear_1.weight) nn.init.xavier_normal_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) #nn.init.xavier_uniform_(self.linear_2.weight) nn.init.xavier_normal_(self.linear_2.weight) self.bn2 = nn.BatchNorm1d(num_features=linear_2_out) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) #nn.init.xavier_uniform_(self.linear_out.weight) nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.softmax = nn.Softmax(dim=1) return
def __init__(self, config, corpus_target, embReader): """ class for simple baseline submitted to COLING20 Title: Context-aware Lexical Coherence Modeling Ref: """ super().__init__(config) #### # init parameters self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.avg_len_doc = config.avg_len_doc self.corpus_target = config.corpus_target self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.dropout_rate = config.dropout self.rnn_cell_size = config.rnn_cell_size self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu self.gen_logs = config.gen_logs if not hasattr(config, "freeze_step"): config.freeze_step = 5000 ######## # self.encoder_base = Encoder_Main(config, embReader) # self.sim_cosine = torch.nn.CosineSimilarity(dim=2) # fc_in_size = self.encoder_base.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_1 = nn.Linear(fc_in_size, linear_1_out) nn.init.xavier_uniform_(self.linear_1.weight) self.linear_2 = nn.Linear(linear_1_out, linear_2_out) nn.init.xavier_uniform_(self.linear_2.weight) self.linear_out = nn.Linear(linear_2_out, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type( torch.FloatTensor) nn.init.xavier_uniform_(self.linear_out.weight) # nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.softmax = nn.Softmax(dim=1) return
def __init__(self, config, corpus_target, embReader): super().__init__(config) #### # init parameters self.corpus_target = config.corpus_target self.max_num_sents = config.max_num_sents # document length, in terms of the number of sentences self.max_len_sent = config.max_len_sent # sentence length, in terms of words self.max_len_doc = config.max_len_doc # document length, in terms of words self.batch_size = config.batch_size self.vocab = corpus_target.vocab # word2id self.rev_vocab = corpus_target.rev_vocab # id2word self.vocab_size = len(self.vocab) self.pad_id = corpus_target.pad_id self.num_special_vocab = corpus_target.num_special_vocab self.embed_size = config.embed_size self.dropout_rate = config.dropout self.path_pretrained_emb = config.path_pretrained_emb self.num_layers = 1 self.output_size = config.output_size # the number of final output class self.pad_level = config.pad_level self.use_gpu = config.use_gpu if not hasattr(config, "freeze_step"): config.freeze_step = 5000 config.rnn_bidir = True ## fix bi-dir to follow original paper of NAACL19 if config.rnn_bidir: self.sem_dim_size = 2 * config.sem_dim_size else: self.sem_dim_size = config.sem_dim_size self.rnn_cell_size = config.rnn_cell_size self.pooling_sent = config.pooling_sent # max or avg self.pooling_doc = config.pooling_doc # max or avg #### self.encoder_base = Encoder_Main(config, embReader) config.rnn_bidir = False self.encoder_sent = Encoder_RNN(config, embReader, self.rnn_cell_size*2, self.rnn_cell_size*2) config.rnn_bidir = True self.structure_att = StructuredAttention(config) # fc_in_size = self.encoder_base.encoder_out_size linear_1_out = fc_in_size // 2 linear_2_out = linear_1_out // 2 self.linear_out = nn.Linear(self.sem_dim_size, self.output_size) if corpus_target.output_bias is not None: # bias init_mean_val = np.expand_dims(corpus_target.output_bias, axis=1) bias_val = (np.log(init_mean_val) - np.log(1 - init_mean_val)) self.linear_out.bias.data = torch.from_numpy(bias_val).type(torch.FloatTensor) # nn.init.xavier_uniform_(self.linear_out.weight) nn.init.xavier_normal_(self.linear_out.weight) # self.selu = nn.SELU() self.elu = nn.ELU() self.leak_relu = nn.LeakyReLU() self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid() self.dropout_layer = nn.Dropout(self.dropout_rate) self.softmax = nn.Softmax(dim=1) return