コード例 #1
0
ファイル: seq_ch_lstm_conv.py プロジェクト: PyThaiNLP/attacut
    def __init__(self,
                 data_config,
                 model_config="emb:32|conv:48|cell:16|bi:1|l1:16|do:0.1"):
        super(Model, self).__init__()

        no_chars = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.dropout = torch.nn.Dropout(p=dropout_rate)

        bidirection = bool(config.get("bi", 0))

        self.lstm = nn.LSTM(emb_dim,
                            config['cell'],
                            batch_first=True,
                            bidirectional=bidirection)

        out_dim = config['cell'] * 2 if bidirection else config['cell']

        self.conv1 = ConvolutionLayer(out_dim, conv_filters, 5)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
コード例 #2
0
    def __init__(self,
                 data_config,
                 model_config="emb:32|conv:48|l1:16|do:0.1"):
        super(Model, self).__init__()

        no_chars = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.dropout = torch.nn.Dropout(p=dropout_rate)

        self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 1)
        self.conv2 = ConvolutionLayer(emb_dim, conv_filters, 3)
        self.conv3 = ConvolutionLayer(emb_dim, conv_filters, 5, dilation=3)
        self.conv4 = ConvolutionLayer(emb_dim, conv_filters, 9, dilation=2)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
    def __init__(self,
                 data_config,
                 model_config="emb_c:8|emb_t:8|conv:8|l1:6|do:0.1"):
        super(Model, self).__init__()

        no_chars = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_c_dim = config['emb_c']
        emb_t_dim = config['emb_t']

        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.ch_embeddings = nn.Embedding(no_chars, emb_c_dim, padding_idx=0)

        self.ch_type_embeddings = nn.Embedding(
            character_type.TOTAL_CHARACTER_TYPES, emb_t_dim, padding_idx=0)

        self.dropout = torch.nn.Dropout(p=dropout_rate)

        emb_dim = emb_c_dim + emb_t_dim

        self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 3)
        self.conv2 = ConvolutionLayer(emb_dim, conv_filters, 5, dilation=3)
        self.conv3 = ConvolutionLayer(emb_dim, conv_filters, 9, dilation=2)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
コード例 #4
0
ファイル: seq_ch_conv_uniq.py プロジェクト: PyThaiNLP/attacut
    def __init__(self,
                 data_config,
                 model_config="emb:32|conv:48|l1:16|do:0.1"):
        super(Model, self).__init__()

        no_chars = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.dropout = torch.nn.Dropout(p=dropout_rate)

        kernel_size = 1
        self.conv0 = nn.Conv1d(emb_dim,
                               conv_filters,
                               kernel_size,
                               stride=1,
                               dilation=1,
                               padding=kernel_size // 2)

        kernel_size = 3
        self.conv1 = nn.Conv1d(emb_dim,
                               conv_filters,
                               kernel_size,
                               stride=1,
                               dilation=1,
                               padding=kernel_size // 2)

        kernel_size = 5
        dilation = 3
        padding = kernel_size // 2
        padding += padding * (dilation - 1)
        self.conv2 = nn.Conv1d(emb_dim,
                               conv_filters,
                               kernel_size,
                               stride=1,
                               dilation=dilation,
                               padding=6)

        kernel_size = 9
        dilation = 2
        padding = kernel_size // 2
        padding += padding * (dilation - 1)
        self.conv3 = nn.Conv1d(emb_dim,
                               conv_filters,
                               kernel_size,
                               stride=1,
                               dilation=dilation,
                               padding=padding)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
コード例 #5
0
    def __init__(self, data_config, model_config="emb:32|l1:64"):
        super(Model, self).__init__()

        window_size = data_config['window_size']
        no_vocabs = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        l1 = config['l1']

        self.embeddings = nn.Embedding(no_vocabs, emb_dim, padding_idx=0)

        self.linear1 = nn.Linear((2 * window_size + 1) * emb_dim, l1)
        self.linear2 = nn.Linear(l1, 1)

        self.model_params = model_config
コード例 #6
0
ファイル: ch_pool.py プロジェクト: PyThaiNLP/attacut
    def __init__(self, data_config, model_config="emb:16|l1:64"):
        super(Model, self).__init__()

        window_size = data_config['window_size']
        no_chars = data_config['num_tokens']
        max_length = data_config['max_seq_length']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.num_embs = 2 * window_size + 1
        self.pooling = nn.MaxPool1d(max_length)

        self.linear1 = nn.Linear(self.num_embs * emb_dim, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)
    def __init__(self, data_config, model_config="embc:16|embs:8|embct:8|conv:16|l1:16|do:0.0"):
        super(Model, self).__init__()

        no_chars = data_config['num_char_tokens']
        print("We have %d characters" % no_chars)
        no_syllables = data_config['num_tokens']
        print("We have %d syllables" % no_syllables)

        config = utils.parse_model_params(model_config)
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.ch_embeddings = nn.Embedding(
            no_chars,
            config['embc'],
            padding_idx=0
        )

        self.ch_type_embeddings = nn.Embedding(
            character_type.TOTAL_CHARACTER_TYPES,
            config['embct'],
            padding_idx=0
        )

        self.sy_embeddings = nn.Embedding(
            no_syllables,
            config['embs'],
            padding_idx=0
        )

        emb_dim = config['embc'] + config['embs'] + config['embct']

        self.dropout= torch.nn.Dropout(p=dropout_rate)

        self.conv1 = ConvolutionBatchNorm(emb_dim, conv_filters, 3)
        self.conv2 = ConvolutionBatchNorm(emb_dim, conv_filters, 5, dilation=3)
        self.conv3 = ConvolutionBatchNorm(emb_dim, conv_filters, 9, dilation=2)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
コード例 #8
0
    def __init__(self, data_config, model_config="emb:32|cell:64|l1:64"):
        super(Model, self).__init__()

        window_size = data_config['window_size']
        no_chars = data_config['num_tokens']

        self.num_embs = 2 * window_size + 1

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        cell = config['cell']

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.lstm = nn.LSTM(emb_dim, cell, batch_first=True)

        self.linear1 = nn.Linear(self.num_embs * cell, config['l1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config
コード例 #9
0
    def __init__(self, data_config, model_config="emb:32|c1:16|l1:8|do:0.1"):
        super(Model, self).__init__()

        no_chars = data_config['num_tokens']

        config = utils.parse_model_params(model_config)
        emb_dim = config['emb']
        dropout_rate = config.get("do", 0)

        self.embeddings = nn.Embedding(no_chars, emb_dim, padding_idx=0)

        self.dropout = torch.nn.Dropout(p=dropout_rate)

        kernel_size = 2
        self.conv1 = ConvolutionLayer(emb_dim, config['c1'], kernel_size)
        self.conv2 = ConvolutionLayer(config['c1'], config['c1'], kernel_size)
        self.conv3 = ConvolutionLayer(config['c1'], config['c1'], kernel_size)

        self.linear1 = nn.Linear(config['c1'], config['lu1'])
        self.linear2 = nn.Linear(config['l1'], 1)

        self.model_params = model_config