Beispiel #1
0
    def __init__(self, hps, embed):
        """
        
        :param hps: 
                word_emb_dim: word embedding dimension
                sent_max_len: max token number in the sentence
                output_channel: output channel for cnn
                min_kernel_size: min kernel size for cnn
                max_kernel_size: max kernel size for cnn
                word_embedding: bool, use word embedding or not
                embedding_path: word embedding path
                embed_train: bool, whether to train word embedding
                cuda: bool, use cuda or not
        :param vocab: FastNLP.Vocabulary
        """
        super(Encoder, self).__init__()

        self._hps = hps
        self.sent_max_len = hps.sent_max_len
        embed_size = hps.word_emb_dim

        sent_max_len = hps.sent_max_len

        input_channels = 1
        out_channels = hps.output_channel
        min_kernel_size = hps.min_kernel_size
        max_kernel_size = hps.max_kernel_size
        width = embed_size

        # word embedding
        self.embed = embed

        # position embedding
        self.position_embedding = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(sent_max_len + 1,
                                        embed_size,
                                        padding_idx=0),
            freeze=True)

        # cnn
        self.convs = nn.ModuleList([
            nn.Conv2d(input_channels,
                      out_channels,
                      kernel_size=(height, width))
            for height in range(min_kernel_size, max_kernel_size + 1)
        ])
        print("[INFO] Initing W for CNN.......")
        for conv in self.convs:
            init_weight_value = 6.0
            init.xavier_normal_(conv.weight.data,
                                gain=np.sqrt(init_weight_value))
            fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out(
                conv.weight.data)
            std = np.sqrt(init_weight_value) * np.sqrt(2.0 /
                                                       (fan_in + fan_out))
Beispiel #2
0
    def __init__(self, hps):
        super(ELMoEndoer2, self).__init__()

        self._hps = hps
        self._cuda = hps.cuda
        self.sent_max_len = hps.sent_max_len

        from allennlp.modules.elmo import Elmo

        elmo_dim = 1024
        options_file = "/remote-home/dqwang/ELMo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json"
        weight_file = "/remote-home/dqwang/ELMo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5"

        # elmo_dim = 512
        # options_file = "/remote-home/dqwang/ELMo/elmo_2x2048_256_2048cnn_1xhighway_options.json"
        # weight_file = "/remote-home/dqwang/ELMo/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"

        embed_size = hps.word_emb_dim
        sent_max_len = hps.sent_max_len

        input_channels = 1
        out_channels = hps.output_channel
        min_kernel_size = hps.min_kernel_size
        max_kernel_size = hps.max_kernel_size
        width = embed_size

        # elmo embedding
        self.elmo = Elmo(options_file, weight_file, 1, dropout=0)
        self.embed_proj = nn.Linear(elmo_dim, embed_size)

        # position embedding
        self.position_embedding = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(sent_max_len + 1,
                                        embed_size,
                                        padding_idx=0),
            freeze=True)

        # cnn
        self.convs = nn.ModuleList([
            nn.Conv2d(input_channels,
                      out_channels,
                      kernel_size=(height, width))
            for height in range(min_kernel_size, max_kernel_size + 1)
        ])
        logger.info("[INFO] Initing W for CNN.......")
        for conv in self.convs:
            init_weight_value = 6.0
            init.xavier_normal_(conv.weight.data,
                                gain=np.sqrt(init_weight_value))
            fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out(
                conv.weight.data)
            std = np.sqrt(init_weight_value) * np.sqrt(2.0 /
                                                       (fan_in + fan_out))
Beispiel #3
0
    def __init__(self, hps):
        super(BertEncoder, self).__init__()

        from pytorch_pretrained_bert.modeling import BertModel

        self._hps = hps
        self.sent_max_len = hps.sent_max_len
        self._cuda = hps.cuda

        embed_size = hps.word_emb_dim
        sent_max_len = hps.sent_max_len

        input_channels = 1
        out_channels = hps.output_channel
        min_kernel_size = hps.min_kernel_size
        max_kernel_size = hps.max_kernel_size
        width = embed_size

        # word embedding
        self._bert = BertModel.from_pretrained(
            "/remote-home/dqwang/BERT/pre-train/uncased_L-24_H-1024_A-16")
        self._bert.eval()
        for p in self._bert.parameters():
            p.requires_grad = False

        self.word_embedding_proj = nn.Linear(4096, embed_size)

        # position embedding
        self.position_embedding = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(sent_max_len + 1,
                                        embed_size,
                                        padding_idx=0),
            freeze=True)

        # cnn
        self.convs = nn.ModuleList([
            nn.Conv2d(input_channels,
                      out_channels,
                      kernel_size=(height, width))
            for height in range(min_kernel_size, max_kernel_size + 1)
        ])
        logger.info("[INFO] Initing W for CNN.......")
        for conv in self.convs:
            init_weight_value = 6.0
            init.xavier_normal_(conv.weight.data,
                                gain=np.sqrt(init_weight_value))
            fan_in, fan_out = Encoder.calculate_fan_in_and_fan_out(
                conv.weight.data)
            std = np.sqrt(init_weight_value) * np.sqrt(2.0 /
                                                       (fan_in + fan_out))
Beispiel #4
0
    def __init__(self, hps, embed):
        """

        :param hps: 
                min_kernel_size: min kernel size for cnn encoder
                max_kernel_size: max kernel size for cnn encoder
                output_channel: output_channel number for cnn encoder
                hidden_size: hidden size for transformer 
                n_layers: transfromer encoder layer
                n_head: multi head attention for transformer
                ffn_inner_hidden_size: FFN hiddens size
                atten_dropout_prob: dropout size
                doc_max_timesteps: max sentence number of the document
        :param vocab: 
        """
        super(TransformerModel, self).__init__()

        self._hps = hps

        self.encoder = Encoder(hps, embed)

        self.sent_embedding_size = (hps.max_kernel_size - hps.min_kernel_size +
                                    1) * hps.output_channel
        self.hidden_size = hps.hidden_size

        self.n_head = hps.n_head
        self.d_v = self.d_k = int(self.hidden_size / self.n_head)
        self.d_inner = hps.ffn_inner_hidden_size
        self.num_layers = hps.n_layers

        self.projection = nn.Linear(self.sent_embedding_size, self.hidden_size)
        self.sent_pos_embed = nn.Embedding.from_pretrained(
            get_sinusoid_encoding_table(hps.doc_max_timesteps + 1,
                                        self.hidden_size,
                                        padding_idx=0),
            freeze=True)

        self.layer_stack = nn.ModuleList([
            EncoderLayer(self.hidden_size,
                         self.d_inner,
                         self.n_head,
                         self.d_k,
                         self.d_v,
                         dropout=hps.atten_dropout_prob)
            for _ in range(self.num_layers)
        ])

        self.wh = nn.Linear(self.hidden_size, 2)