def __init__(self, hps, embed): """ :param hps: word_emb_dim: word embedding dimension sent_max_len: max token number in the sentence output_channel: output channel for cnn min_kernel_size: min kernel size for cnn max_kernel_size: max kernel size for cnn word_embedding: bool, use word embedding or not embedding_path: word embedding path embed_train: bool, whether to train word embedding cuda: bool, use cuda or not :param vocab: FastNLP.Vocabulary """ super(Encoder, self).__init__() self._hps = hps self.sent_max_len = hps.sent_max_len embed_size = hps.word_emb_dim sent_max_len = hps.sent_max_len input_channels = 1 out_channels = hps.output_channel min_kernel_size = hps.min_kernel_size max_kernel_size = hps.max_kernel_size width = embed_size # word embedding self.embed = embed # position embedding self.position_embedding = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(sent_max_len + 1, embed_size, padding_idx=0), freeze=True) # cnn self.convs = nn.ModuleList([ nn.Conv2d(input_channels, out_channels, kernel_size=(height, width)) for height in range(min_kernel_size, max_kernel_size + 1) ]) print("[INFO] Initing W for CNN.......") for conv in self.convs: init_weight_value = 6.0 init.xavier_normal_(conv.weight.data, gain=np.sqrt(init_weight_value)) fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out( conv.weight.data) std = np.sqrt(init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
def __init__(self, hps): super(ELMoEndoer2, self).__init__() self._hps = hps self._cuda = hps.cuda self.sent_max_len = hps.sent_max_len from allennlp.modules.elmo import Elmo elmo_dim = 1024 options_file = "/remote-home/dqwang/ELMo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json" weight_file = "/remote-home/dqwang/ELMo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5" # elmo_dim = 512 # options_file = "/remote-home/dqwang/ELMo/elmo_2x2048_256_2048cnn_1xhighway_options.json" # weight_file = "/remote-home/dqwang/ELMo/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5" embed_size = hps.word_emb_dim sent_max_len = hps.sent_max_len input_channels = 1 out_channels = hps.output_channel min_kernel_size = hps.min_kernel_size max_kernel_size = hps.max_kernel_size width = embed_size # elmo embedding self.elmo = Elmo(options_file, weight_file, 1, dropout=0) self.embed_proj = nn.Linear(elmo_dim, embed_size) # position embedding self.position_embedding = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(sent_max_len + 1, embed_size, padding_idx=0), freeze=True) # cnn self.convs = nn.ModuleList([ nn.Conv2d(input_channels, out_channels, kernel_size=(height, width)) for height in range(min_kernel_size, max_kernel_size + 1) ]) logger.info("[INFO] Initing W for CNN.......") for conv in self.convs: init_weight_value = 6.0 init.xavier_normal_(conv.weight.data, gain=np.sqrt(init_weight_value)) fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out( conv.weight.data) std = np.sqrt(init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
def __init__(self, hps): super(BertEncoder, self).__init__() from pytorch_pretrained_bert.modeling import BertModel self._hps = hps self.sent_max_len = hps.sent_max_len self._cuda = hps.cuda embed_size = hps.word_emb_dim sent_max_len = hps.sent_max_len input_channels = 1 out_channels = hps.output_channel min_kernel_size = hps.min_kernel_size max_kernel_size = hps.max_kernel_size width = embed_size # word embedding self._bert = BertModel.from_pretrained( "/remote-home/dqwang/BERT/pre-train/uncased_L-24_H-1024_A-16") self._bert.eval() for p in self._bert.parameters(): p.requires_grad = False self.word_embedding_proj = nn.Linear(4096, embed_size) # position embedding self.position_embedding = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(sent_max_len + 1, embed_size, padding_idx=0), freeze=True) # cnn self.convs = nn.ModuleList([ nn.Conv2d(input_channels, out_channels, kernel_size=(height, width)) for height in range(min_kernel_size, max_kernel_size + 1) ]) logger.info("[INFO] Initing W for CNN.......") for conv in self.convs: init_weight_value = 6.0 init.xavier_normal_(conv.weight.data, gain=np.sqrt(init_weight_value)) fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out( conv.weight.data) std = np.sqrt(init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out))
def __init__(self, hps, embed): """ :param hps: min_kernel_size: min kernel size for cnn encoder max_kernel_size: max kernel size for cnn encoder output_channel: output_channel number for cnn encoder hidden_size: hidden size for transformer n_layers: transfromer encoder layer n_head: multi head attention for transformer ffn_inner_hidden_size: FFN hiddens size atten_dropout_prob: dropout size doc_max_timesteps: max sentence number of the document :param vocab: """ super(TransformerModel, self).__init__() self._hps = hps self.encoder = Encoder(hps, embed) self.sent_embedding_size = (hps.max_kernel_size - hps.min_kernel_size + 1) * hps.output_channel self.hidden_size = hps.hidden_size self.n_head = hps.n_head self.d_v = self.d_k = int(self.hidden_size / self.n_head) self.d_inner = hps.ffn_inner_hidden_size self.num_layers = hps.n_layers self.projection = nn.Linear(self.sent_embedding_size, self.hidden_size) self.sent_pos_embed = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(hps.doc_max_timesteps + 1, self.hidden_size, padding_idx=0), freeze=True) self.layer_stack = nn.ModuleList([ EncoderLayer(self.hidden_size, self.d_inner, self.n_head, self.d_k, self.d_v, dropout=hps.atten_dropout_prob) for _ in range(self.num_layers) ]) self.wh = nn.Linear(self.hidden_size, 2)