Ejemplo n.º 1
0
def SRUModel(embed_mat, MAX_LEN, num_cls, sru_sz=128):
    ip = Input(shape=(MAX_LEN, ))
    embed = Embedding(embed_mat.shape[0],
                      embed_mat.shape[1],
                      weights=[embed_mat],
                      input_length=MAX_LEN,
                      trainable=False)

    prev_input = embed(ip)
    hidden_states = []
    depth = 2
    if depth > 1:
        for i in range(depth - 1):
            h, h_final, c_final = SRU(sru_sz,
                                      dropout=0.0,
                                      recurrent_dropout=0.0,
                                      return_sequences=True,
                                      return_state=True,
                                      unroll=True)(prev_input)
            prev_input = h
            hidden_states.append(c_final)
    outputs = SRU(sru_sz, dropout=0.0, recurrent_dropout=0.0,
                  unroll=True)(prev_input)
    outputs = Dense(num_cls, activation='softmax')(outputs)
    model = Model(ip, outputs)
    model.summary()
    # try using different optimizers and different optimizer configs
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Ejemplo n.º 2
0
    def __init__(self, trg_vocab_size, with_ln=False, max_out=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(wargs.dec_hid_size, wargs.align_size)
        self.trg_lookup_table = nn.Embedding(trg_vocab_size, wargs.trg_wemb_size, padding_idx=PAD)
        self.tanh = nn.Tanh()

        if wargs.dec_rnn_type == 'gru':
            self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size, with_ln=with_ln)
            self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size, with_ln=with_ln)
        elif wargs.dec_rnn_type == 'sru':
            self.gru1 = SRU(input_size=wargs.trg_wemb_size, hidden_size=wargs.dec_hid_size,
                    num_layers=wargs.dec_layer_cnt, dropout=0., bidirectional=False)
            self.gru2 = SRU(input_size=2*wargs.enc_hid_size, hidden_size=wargs.dec_hid_size,
                    num_layers=wargs.dec_layer_cnt, dropout=0., bidirectional=False)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(wargs.dec_hid_size, out_size)
        self.ly = nn.Linear(wargs.trg_wemb_size, out_size)
        self.lc = nn.Linear(2*wargs.enc_hid_size, out_size)

        self.classifier = Classifier(wargs.out_size, trg_vocab_size,
                                     self.trg_lookup_table if wargs.proj_share_weight is True else None)
Ejemplo n.º 3
0
def eval_imdb():
    max_features = 20000
    maxlen = 80  # cut texts after this number of words (among top max_features most common words)
    batch_size = 128

    depth = 1

    print('Loading data...')
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=max_features)
    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')

    print('Pad sequences (samples x time)')
    x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
    x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)

    print('Build model...')
    ip = Input(shape=(maxlen, ))
    embed = Embedding(max_features, 128)(ip)

    prev_input = embed
    hidden_states = []

    if depth > 1:
        for i in range(depth - 1):
            h, h_final, c_final = SRU(128,
                                      dropout=0.0,
                                      recurrent_dropout=0.0,
                                      return_sequences=True,
                                      return_state=True,
                                      unroll=True)(prev_input)
            prev_input = h
            hidden_states.append(c_final)

    outputs = SRU(128, dropout=0.0, recurrent_dropout=0.0,
                  unroll=True)(prev_input)
    outputs = Dense(1, activation='sigmoid')(outputs)

    model = Model(ip, outputs)
    model.summary()

    # try using different optimizers and different optimizer configs
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    print('Train...')
    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              epochs=100,
              validation_data=(x_test, y_test))
    score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
    print('Test score:', score)
    print('Test accuracy:', acc)
Ejemplo n.º 4
0
    def __init__(self,
                 dict_size,
                 emb_size=1000,
                 hid_size=1000,
                 vis_size=2688,
                 num_filters=10,
                 mixed_size=1000,
                 hid_mixed_size=1005,
                 lang_layers=3,
                 mixed_layers=3,
                 backend='dpn92',
                 mix_we=True,
                 lstm=False,
                 pretrained=True,
                 extra=True,
                 high_res=False):
        super().__init__()
        self.high_res = high_res
        self.vis_size = vis_size
        self.num_filters = num_filters
        if backend == 'dpn92':
            self.base = create_model(backend,
                                     1,
                                     pretrained=pretrained,
                                     extra=extra)
        else:
            self.base = create_model(backend, 1, pretrained=pretrained)

        self.emb = nn.Embedding(dict_size, emb_size)
        self.lang_model = SRU(emb_size, hid_size, num_layers=lang_layers)
        if lstm:
            self.lang_model = nn.LSTM(emb_size,
                                      hid_size,
                                      num_layers=lang_layers)

        self.mix_we = mix_we
        lineal_in = hid_size + emb_size * int(mix_we)
        self.adaptative_filter = nn.Linear(in_features=lineal_in,
                                           out_features=(num_filters *
                                                         (vis_size + 8)))

        self.comb_conv = nn.Conv2d(in_channels=(8 + emb_size + hid_size +
                                                vis_size + num_filters),
                                   out_channels=mixed_size,
                                   kernel_size=1,
                                   padding=0)

        self.mrnn = SRU(mixed_size, hid_mixed_size, num_layers=mixed_layers)
        if lstm:
            self.mrnn = nn.LSTM(mixed_size,
                                hid_mixed_size,
                                num_layers=mixed_layers)

        if not self.high_res:
            self.output_collapse = nn.Conv2d(in_channels=hid_mixed_size,
                                             out_channels=1,
                                             kernel_size=1)
Ejemplo n.º 5
0
def _profile():
    seq_length = 50
    batchsize = 48
    feature_dimension = 128
    data_cpu = np.random.normal(0,
                                1,
                                size=(batchsize, feature_dimension,
                                      seq_length)).astype(np.float32)
    data_gpu = cuda.to_gpu(data_cpu, gpu_device)

    # CPU
    layer = SRU(feature_dimension, feature_dimension)
    for _ in range(100):
        h_cpu, c_cpu = layer(data_cpu)
        layer.reset_state()

    # GPU (define-by-run)
    layer = NaiveSRU(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        h, c = layer(data_gpu)
        layer.reset_state()

    # GPU (CUDA Kernel)
    layer = SRU(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        h_gpu, c_gpu = layer(data_gpu)
        layer.reset_state()

    # GPU (PyTorch)
    with torch.cuda.device(gpu_device):
        from cuda_functional import SRU as PyTorchSRU
        data_gpu_torch = torch.FloatTensor(seq_length, batchsize,
                                           feature_dimension).cuda()
        rnn = PyTorchSRU(128,
                         128,
                         num_layers=1,
                         dropout=0.0,
                         rnn_dropout=0.0,
                         use_tanh=0,
                         bidirectional=False)
        rnn.cuda()
        for _ in range(100):
            output, hidden = rnn(torch.autograd.Variable(data_gpu_torch))

    # LSTM (Chainer)
    layer = links.LSTM(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        for t in range(seq_length):
            h = layer(data_gpu[..., t])
        layer.reset_state()

    print(h_cpu)
    print(h_gpu)
Ejemplo n.º 6
0
    def __init__(self, config: Dict):
        """
        :param config: A dictionary containing the model and training configuration.
        """
        super(Retrieval, self).__init__()

        #self.usecuda = config['cuda']
        self.config = config
        self.device = torch.device('cuda') if config['cuda'] else torch.device(
            'cpu')

        if config['bert']:
            self.context_bert = AutoModel.from_pretrained(config['bert_type'])
            self.response_bert = AutoModel.from_pretrained(config['bert_type'])
            bertsize = self.context_bert.config.hidden_size
            self.output_size = bertsize

        else:
            self.embedding_size = config['embedding_size']
            self.hidden_size = config['hidden_size']
            self.bidirectional = config['bidirectional']
            self.num_layers = config['num_layers']
            self.dropout = config['dropout']
            self.rnn_dropout = config['rnn_dropout']

            self.output_size = self.hidden_size * (1 + self.bidirectional)
            self.dropout_layer = nn.Dropout(self.dropout)
            self.context_rnn = SRU(input_size=self.embedding_size,
                                   hidden_size=self.hidden_size,
                                   num_layers=self.num_layers,
                                   dropout=self.dropout,
                                   rnn_dropout=self.rnn_dropout,
                                   bidirectional=self.bidirectional,
                                   use_tanh=False,
                                   layer_norm=False,
                                   rescale=False)
            self.response_rnn = SRU(input_size=self.embedding_size,
                                    hidden_size=self.hidden_size,
                                    num_layers=self.num_layers,
                                    dropout=self.dropout,
                                    rnn_dropout=self.rnn_dropout,
                                    bidirectional=self.bidirectional,
                                    use_tanh=False,
                                    layer_norm=False,
                                    rescale=False)

            if config['use_attention'] == 'True':
                self.context_attention = SelfAttentiveLayer(
                    self.output_size, config)
                self.response_attention = SelfAttentiveLayer(
                    self.output_size, config)

            if config['use_bilinear'] == 'True':
                self.map = nn.Sequential(
                    nn.Linear(self.output_size, self.output_size), nn.Tanh())
Ejemplo n.º 7
0
def benchmark_chainer_sru(batchsize, seq_length, feature_dimension, repeat=50):
    layer = SRU(feature_dimension)
    x_data = np.random.normal(0,
                              1,
                              size=(batchsize, feature_dimension,
                                    seq_length)).astype(np.float32)
    x_data = cuda.to_gpu(x_data)
    layer.to_gpu()

    with chainer.no_backprop_mode() and chainer.using_config("train", False):
        # forward
        start_time = time.time()
        for i in range(repeat):
            output, cell, last_cell = layer(x_data, None)
        forward_time_mean = (time.time() - start_time) / repeat

    with chainer.using_config("train", True):
        # backward
        start_time = time.time()
        for i in range(repeat):
            output, cell, last_cell = layer(x_data, None)
            layer.cleargrads()
            functions.sum(output).backward()
        backward_time_mean = (time.time() - start_time) / repeat

    return forward_time_mean, backward_time_mean
Ejemplo n.º 8
0
 def __init__(self, nb_layer, dim_in, dim_out, dropout=0.25):
     super(SruEmb, self).__init__()
     self.dim_out = dim_out
     self.rnn = SRU(dim_in, dim_out, num_layers=nb_layer,
                    dropout=dropout, rnn_dropout=dropout,
                    use_tanh=True, has_skip_term=True,
                    v1=True, rescale=False)
Ejemplo n.º 9
0
    def __init__(self,
                 in_dim=1024,
                 hidden_dim=512,
                 n_tags=11,
                 num_layers=2,
                 cell='gru'):
        super(RnnDecoder, self).__init__()
        if cell == 'gru':
            self.rnn = nn.GRU(input_size=in_dim,
                              hidden_size=hidden_dim,
                              num_layers=num_layers,
                              dropout=0.5,
                              bidirectional=True)

        if cell == 'lstm':
            self.rnn = nn.LSTM(input_size=in_dim,
                               hidden_size=hidden_dim,
                               num_layers=num_layers,
                               dropout=0.5,
                               bidirectional=True)

        elif cell == 'sru':
            from sru import SRU
            self.rnn = SRU(input_size=in_dim,
                           hidden_size=hidden_dim,
                           num_layers=num_layers,
                           dropout=0.5,
                           bidirectional=True)

        self.out = nn.Sequential(nn.ReLU(), nn.Dropout(),
                                 nn.Linear(hidden_dim * 2, n_tags))
Ejemplo n.º 10
0
    def __init__(self,
                 context_len=21,
                 in_dim=1024,
                 out_dim=1024,
                 num_layers=2,
                 cell='gru'):
        super(RnnEncoder, self).__init__()

        self.hidden_dim = out_dim // 2

        if cell == 'gru':
            self.rnn = nn.GRU(input_size=in_dim,
                              hidden_size=self.hidden_dim,
                              num_layers=num_layers,
                              dropout=0.5,
                              bidirectional=True)

        if cell == 'lstm':
            self.rnn = nn.LSTM(input_size=in_dim,
                               hidden_size=self.hidden_dim,
                               num_layers=num_layers,
                               dropout=0.5,
                               bidirectional=True)

        elif cell == 'sru':
            from sru import SRU
            self.rnn = SRU(input_size=in_dim,
                           hidden_size=self.hidden_dim,
                           num_layers=num_layers,
                           dropout=0.5,
                           bidirectional=True)
Ejemplo n.º 11
0
    def __init__(self,
                 img_dim=512,
                 num_segments=12,
                 hidden_size=1024,
                 num_class=51):
        super(Recurrent_model, self).__init__()
        self.img_dim = img_dim
        self.num_segments = num_segments
        self.num_class = num_class

        self.rnn = SRU(img_dim,
                       hidden_size,
                       num_layers=3,
                       dropout=0.5,
                       bidirectional=False,
                       layer_norm=False,
                       highway_bias=0,
                       rescale=True)

        # self.rnn = nn.LSTM(img_dim, hidden_size,
        #                num_layers = 3,
        #                dropout = 0.5,
        #                bidirectional = False)

        # self.rnn = nn.GRU(img_dim, hidden_size,
        #                num_layers = 3,
        #                dropout = 0.5,
        #                bidirectional = False)

        self.dropout = nn.Dropout()
        self.fc = nn.Linear(hidden_size, self.num_class)
Ejemplo n.º 12
0
 def __init__(self, pretrained_path, vocab, K=620, d=2400, num_stack=4):
     super(TextEncoder, self).__init__()
     self.vocab = vocab
     self.embedding = self.create_emb_layer(
         self.load_dicts(pretrained_path),
         self.load_emb_params(pretrained_path), self.vocab, K)
     self.input_size = K
     self.hidden_size = d
     self.num_layers = num_stack
     # self.sru = nn.GRU(self.input_size, self.hidden_size,
     #                    num_layers = self.num_layers,
     #                    dropout = 0.25)
     self.sru = SRU(
         self.input_size,
         self.hidden_size,
         num_layers=self.num_layers,  # number of stacking RNN layers
         rnn_dropout=
         0.25,  # variational dropout applied on linear transformation
         use_tanh=1,  # use tanh?
         use_relu=0,  # use ReLU?
         use_selu=0,  # use SeLU?
         bidirectional=False,  # bidirectional RNN ?
         weight_norm=False,  # apply weight normalization on parameters
         layer_norm=
         False,  # apply layer normalization on the output of each layer
         highway_bias=0  # initial bias of highway gate (<= 0)
     )
Ejemplo n.º 13
0
    def __init__(self,
                 src_vocab_size,
                 input_size,
                 output_size,
                 bidirectional=False,
                 with_ln=False,
                 prefix='Encoder', **kwargs):

        super(Encoder, self).__init__()

        self.output_size = output_size
        f = lambda name: str_cat(prefix, name)  # return 'Encoder_' + parameters name

        self.src_lookup_table = nn.Embedding(src_vocab_size, wargs.src_wemb_size, padding_idx=PAD)

        if wargs.enc_rnn_type == 'gru':
            self.forw_gru = GRU(input_size, output_size, with_ln=with_ln, prefix=f('Forw'))
            self.back_gru = GRU(output_size, output_size, with_ln=with_ln, prefix=f('Back'))
        elif wargs.enc_rnn_type == 'sru':
            self.rnn = SRU(
                    input_size=input_size,
                    hidden_size=output_size,
                    num_layers=wargs.enc_layer_cnt,
                    dropout=wargs.drop_rate,
                    bidirectional=bidirectional)
Ejemplo n.º 14
0
    def __init__(self, config: Dict):
        """
        :param config: A dictionary containing the model and training configuration.
        """
        super(FAQRetrieval, self).__init__()

        self.device = torch.device('cuda') if config['cuda'] else torch.device(
            'cpu')

        self.embedding_size = config['embedding_size']
        self.hidden_size = config['hidden_size']
        self.bidirectional = config['bidirectional']
        self.num_layers = config['num_layers']
        self.dropout = config['dropout']
        self.rnn_dropout = config['rnn_dropout']
        self.output_size = self.hidden_size * (1 + self.bidirectional)

        self.dropout_layer = nn.Dropout(self.dropout)
        self.rnn = SRU(input_size=self.embedding_size,
                       hidden_size=self.hidden_size,
                       num_layers=self.num_layers,
                       dropout=self.dropout,
                       rnn_dropout=self.rnn_dropout,
                       bidirectional=self.bidirectional,
                       use_tanh=False,
                       layer_norm=False,
                       rescale=False)
        self.config = config
        if self.config['use_attention']:
            self.attention = SelfAttentiveLayer(self.output_size, config)
        self.candi_mat = None
Ejemplo n.º 15
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 output_size,
                 n_layers=1,
                 padding_index=3,
                 dropout=0.0,
                 embedding_dropout=0.0):
        super(Decoder, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.dropout = dropout
        self.embedding_dropout = embedding_dropout

        self.embed = nn.Embedding(output_size,
                                  embed_size,
                                  padding_idx=padding_index)
        self.attention = Attention(hidden_size)
        self.gru = SRU(hidden_size + embed_size,
                       hidden_size,
                       num_layers=n_layers,
                       layer_norm=True,
                       dropout=dropout)
        # self.out = nn.Linear(hidden_size*2, output_size)
        self.out = nn.Linear(hidden_size, output_size)
Ejemplo n.º 16
0
    def __init__(self,
                 char_embedder=None,
                 sru=True,
                 char_embedder_params=None,
                 model_params=None):
        super().__init__()

        if char_embedder is None:
            if char_embedder_params is None:
                char_embedder_params = default_char_embedder_params
            self.char_embedder = CharEmbedder(**char_embedder_params)
        else:
            self.char_embedder = char_embedder

        self.use_gpu = False
        self.sru = sru

        if self.sru:
            if model_params is None:
                model_params = default_sru_model_params
            self._language_model = SRU(input_size=model_params['output_dim'],
                                       hidden_size=model_params['output_dim'],
                                       use_tanh=True,
                                       num_layers=model_params['n_layers'])
        else:
            if model_params is None:
                model_params = default_lstm_model_params
            self._language_model = nn.LSTM(
                input_size=model_params['output_dim'],
                hidden_size=model_params['output_dim'],
                num_layers=model_params['n_layers'],
                batch_first=True)
        self.model_params = model_params
Ejemplo n.º 17
0
    def __init__(self, args, dim_in):
        super(SruEmb, self).__init__()

        self.dim_out_rnn = args.dimemb
        self.dim_out = args.dimemb
        self.rnn = SRU(dim_in,
                       self.dim_out_rnn,
                       num_layers=args.sru,
                       dropout=0.25,
                       rnn_dropout=0.25,
                       use_tanh=True)

        #self.rnn = nn.LSTM(dim_in, self.dim_out, num_layers=nb_layer,
        #                    dropout = dropout, bidirectional=True)
        self.attn_hop = args.attn_hop
        self.attn_hidden = args.attn_hidden
        self.ws1 = nn.Linear(self.dim_out, self.attn_hidden, bias=False)
        self.ws2 = nn.Linear(self.attn_hidden, self.attn_hop, bias=False)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=2)
        #Number 1 is p = 0.25
        self.drop = nn.Dropout(p=0.5)
        self.fc = nn.Linear(self.dim_out * self.attn_hop,
                            self.dim_out,
                            bias=True)
Ejemplo n.º 18
0
def test_gru_compatible_state_return():
    N = 5
    max_len = 7
    V = 32
    K = 8
    K_out = 11
    num_layers = 3
    bidirectional = True

    print('N', N, 'max_len', max_len, 'num_layers', num_layers, 'bidirectional', bidirectional, 'K', K, 'K_out', K_out)

    torch.manual_seed(123)
    np.random.seed(123)
    lengths = torch.from_numpy(np.random.choice(max_len, N)) + 1
    tensors = [torch.from_numpy(np.random.choice(V, l, replace=True)) for l in lengths.tolist()]
    embedder = nn.Embedding(V, K)
    tensors = nn.utils.rnn.pad_sequence(tensors)
    embedded = embedder(tensors)

    sru = SRU(K, K_out, nn_rnn_compatible_return=True, bidirectional=bidirectional, num_layers=num_layers)
    out, state = sru(embedded)
    print('out.size()', out.size())
    print('state.size()', state.size())

    gru = nn.GRU(K, K_out, bidirectional=bidirectional, num_layers=num_layers)
    gru_out, gru_state = gru(embedded)
    print('gru_state.size()', gru_state.size())
Ejemplo n.º 19
0
 def __init__(self,
              input_size,
              embed_size,
              hidden_size,
              word2vec,
              n_layers=1,
              padding_index=3,
              dropout=0.0,
              embedding_dropout=0.0):
     super(Encoder, self).__init__()
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.embed_size = embed_size
     self.wv = word2vec.wv
     self.gru = SRU(embed_size,
                    hidden_size,
                    num_layers=n_layers,
                    bidirectional=True,
                    layer_norm=True,
                    dropout=dropout)
     self.linear = nn.Linear(hidden_size * 2, hidden_size)
     self.dropout = dropout
     self.embedding_dropout = embedding_dropout
     # Word2Vec rescale
     self.wv = word2vec.wv
     self.embedding = nn.Embedding(
         input_size, embed_size,
         padding_idx=padding_index)  # embedding layer
Ejemplo n.º 20
0
def benchmark_sru_cpu():
    print('-' * 60)
    print('SRU CPU benchmark:')

    rnn = SRU(input_size=input_size,
                 hidden_size=hidden_size,
                 bidirectional=(n_directions == 2),
                 num_layers=1)

    input = torch.randn(seq_len, batch_size, input_size)
    h0 = torch.randn(n_layers, batch_size, hidden_size * n_directions)
    print('input.shape', input.shape)
    print('h0.shape', h0.shape)
    with torch.no_grad():
        rnn.eval()
        output, hn = rnn(input, h0)
    print('output.shape', output.shape)
    print('hn.shape', hn.shape)

    n_iter = 1000
    start = time.time()
    with torch.no_grad():
        rnn.eval()
        for i in range(n_iter):
            rnn.forward(input)
    print('Time:', round((time.time() - start), 2), 'sec')
Ejemplo n.º 21
0
 def _build_rnn(self, name, input, units, layers):
     if name == 'lstm':
         return torch.nn.LSTM(input, units, layers, batch_first=True)
     if name == 'gru':
         return torch.nn.GRU(input, units, layers, batch_first=True)
     if name == 'sru':
         from sru import SRU
         return SRU(input, units, layers, dropout=0, layer_norm=False)
Ejemplo n.º 22
0
 def __init__(self, num_layers=4, hidden_size=512):
     """
     BiGRU neural model, which finds minimums and maximums of time series with
     :param num_layers: number of GRU layers
     :param hidden_size: size of hidden GRU layers
     """
     super().__init__()
     self.rnn = SRU(1, hidden_size, num_layers, bidirectional=True)
     self.classifier = nn.Linear(2 * hidden_size, 3)
Ejemplo n.º 23
0
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int = 1,
                 rnn_type: str = 'lstm',
                 dropout: float = 0,
                 layer_norm: bool = False,
                 highway_bias: float = 0,
                 rescale: bool = True,
                 enforce_sorted: bool = False,
                 attention: Optional[nn.Module] = None,
                 activation: Optional[nn.Module] = None,
                 **kwargs) -> None:
        """Initializes the RNNDecoder object.
        """
        super().__init__()

        self.rnn_type = rnn_type
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.enforce_sorted = enforce_sorted
        if rnn_type in ['lstm', 'gru']:
            if kwargs:
                logger.warn(
                    f"The following '{kwargs}' will be ignored " +
                    "as they are only considered when using 'sru' as " +
                    "'rnn_type'")

            rnn_fn = nn.LSTM if rnn_type == 'lstm' else nn.GRU
            self.rnn = rnn_fn(input_size=input_size,
                              hidden_size=hidden_size,
                              num_layers=n_layers,
                              dropout=dropout)
        elif rnn_type == 'sru':
            from sru import SRU
            try:
                self.rnn = SRU(input_size,
                               hidden_size,
                               num_layers=n_layers,
                               dropout=dropout,
                               layer_norm=layer_norm,
                               rescale=rescale,
                               highway_bias=highway_bias,
                               **kwargs)
            except TypeError:
                raise ValueError(f"Unknown kwargs passed to SRU: {kwargs}")
        else:
            raise ValueError(
                f"Unknown rnn type: {rnn_type}, use of of: gru, sru, lstm")

        self.attention = attention

        if self.attention is not None:
            self.linear = torch.nn.Linear(in_features=hidden_size * 2,
                                          out_features=hidden_size)

        self.activation = activation
Ejemplo n.º 24
0
    def __init__(self, input_size, gru_hidden_size, dropout):
        super(FeatureGeneratorSRU, self).__init__()
        self.dropout = dropout
        self.training = False

        self.rnn = SRU(input_size, gru_hidden_size, num_layers=4,
                       use_tanh=0, use_relu=0, use_selu=1,
                       weight_norm=True,
                       dropout=0.2,
                       bidirectional=True)

        self.norm = nn.LayerNorm(256)
Ejemplo n.º 25
0
    def __init__(self,
                 imgH,
                 nc,
                 nclass,
                 nh,
                 width=48,
                 n_rnn=1,
                 isSRU=True,
                 leakyRelu=False,
                 with_se=False,
                 with_mean_max_pooling=False):
        super(CRNN, self).__init__()
        assert width % 2 == 0
        channel = lambda i: (2**i) * width
        self.cnn = nn.Sequential(
            nn.BatchNorm2d(3, affine=False),
            conv_bn_relu(3, 32, 3, 2, 1),
            nn.MaxPool2d(2, 2, 0, ceil_mode=True),
            res_stageCRNN(32, channel(2), 4, with_se),
            res_stageCRNN(channel(2), channel(3), 8, with_se),
            #res_stageCRNN(channel(3), channel(4), 4, False),
            conv_bn_relu(channel(3), channel(4), 2, 1, 0),
        )

        for name, m in self.named_modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.constant(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant(m.running_mean, 0)
                nn.init.constant(m.running_var, 1)
                if m.weight is not None:
                    nn.init.constant(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant(m.bias, 0)
        if False == isSRU:
            self.rnn = nn.LSTM(channel(4), nh, n_rnn, bidirectional=True)
        else:
            self.rnn = SRU(
                channel(4),
                nh,
                num_layers=n_rnn,  # number of stacking RNN layers
                dropout=0.0,  # dropout applied between RNN layers
                rnn_dropout=
                0.0,  # variational dropout applied on linear transformation
                use_tanh=1,  # use tanh?
                use_relu=0,  # use ReLU?
                bidirectional=True  # bidirectional RNN ?
            )
        self.embeddingCTC = nn.Linear(nh * 2, nclass)
        self.attention = Attention(nh * 2, nh, nclass, 256)
Ejemplo n.º 26
0
    def __init__(
                self,
                input_dim=257,
                output_dim=257,
                hidden_layers=2,
                hidden_units=512,
                left_context=1,
                right_context=1,
                kernel_size=6,
                kernel_num=9,
                target_mode='MSA',
                dropout=0.2
                
        ):
        super(SRUC, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layers = hidden_layers
        self.hidden_units = hidden_units
        self.left_context = left_context
        self.right_context = right_context
        self.kernel_size = kernel_size
        self.kernel_sum = kernel_num
        self.target_mode = target_mode

        self.input_layer = nn.Sequential(
                nn.Linear((left_context+1+right_context)*input_dim, hidden_units),
                nn.Tanh()
            )
        
        self.rnn_layer = SRU(
                    input_size=hidden_units,
                    hidden_size=hidden_units,
                    num_layers=self.hidden_layers,
                    dropout=dropout,
                    rescale=True,
                    bidirectional=False,
                    layer_norm=False
            )
        
        self.conv2d_layer = nn.Sequential(
                #nn.Conv2d(in_channels=1,out_channels=kernel_num,kernel_size=(kernel_size, kernel_size), stride=[1,1],padding=(5,5), dilation=(2,2)),
                modules.Conv2d(in_channels=1, out_channels=kernel_num, kernel_size=(kernel_size, kernel_size)),
                nn.Tanh(),
                nn.MaxPool2d(3,stride=1,padding=(1,1))
            )
        
        self.output_layer = nn.Sequential(
                nn.Linear(hidden_units*kernel_num, (left_context+1+right_context)*self.output_dim),
                nn.Sigmoid()
            )
Ejemplo n.º 27
0
 def __init__(self, nIn, nHidden, nOut, isSRU, nLayer):
     super(BidirectionalLSTM_Embed, self).__init__()
     if False == isSRU:
         self.rnn = nn.LSTM(nIn, nHidden, nLayer, bidirectional=True)
     else:
         self.rnn = SRU(
             nIn,
             nHidden,
             num_layers=nLayer,  # number of stacking RNN layers
             dropout=0.0,  # dropout applied between RNN layers
             rnn_dropout=
             0.0,  # variational dropout applied on linear transformation
             use_tanh=1,  # use tanh?
             use_relu=0,  # use ReLU?
             bidirectional=True  # bidirectional RNN ?
         )
Ejemplo n.º 28
0
def test_packed():
    N = 5
    max_len = 7
    V = 32
    K = 8
    K_out = 11

    print('N', N, 'max_len', max_len, 'K', K, 'K_out', K_out)

    torch.manual_seed(123)
    np.random.seed(123)
    lengths = torch.from_numpy(np.random.choice(max_len, N)) + 1
    tensors = [torch.from_numpy(np.random.choice(V, l, replace=True)) for l in lengths.tolist()]
    embedder = nn.Embedding(V, K)
    tensors = nn.utils.rnn.pad_sequence(tensors)
    print('tensors.size()', tensors.size())
    embedded = embedder(tensors)
    print('embedded.size()', embedded.size())
    packed = nn.utils.rnn.pack_padded_sequence(embedded, lengths, batch_first=False, enforce_sorted=False)
    print(isinstance(packed, nn.utils.rnn.PackedSequence))

    sru = SRU(K, K_out)
    out1, state = sru(packed)
    out1, lengths1 = nn.utils.rnn.pad_packed_sequence(out1)
    print('out1.size()', out1.size())
    assert (lengths != lengths1).sum().item() == 0
    print('out1.sum()', out1.sum().item())

    # change one of the indexes taht should not be masked out
    tensors[6, 1] = 3
    embedded = embedder(tensors)
    packed = nn.utils.rnn.pack_padded_sequence(embedded, lengths, batch_first=False, enforce_sorted=False)
    out2, state = sru(packed)
    out2, lengths2 = nn.utils.rnn.pad_packed_sequence(out2)
    assert (lengths != lengths2).sum().item() == 0
    print('out2.sum()', out2.sum().item())
    assert out2.sum().item() == out1.sum().item()

    # change one of the indexes taht should be masked out
    tensors[1, 1] = 3
    embedded = embedder(tensors)
    packed = nn.utils.rnn.pack_padded_sequence(embedded, lengths, batch_first=False, enforce_sorted=False)
    out3, state = sru(packed)
    out3, lengths3 = nn.utils.rnn.pad_packed_sequence(out3)
    assert (lengths != lengths3).sum().item() == 0
    print('out3.sum()', out3.sum().item())
    assert out3.sum().item() != out1.sum().item()
Ejemplo n.º 29
0
    def prepare(self, p):
        # input has length 20, batch size 32 and dimension 128
        self.x = Variable(torch.rand(20, 32, 128).cuda())
        input_size, hidden_size = 128, 128

        self.rnn = SRU(
            input_size,
            hidden_size,
            num_layers=2,  # number of stacking RNN layers
            dropout=0.00001,  # dropout applied between RNN layers
            rnn_dropout=
            0.0001,  # variational dropout applied on linear transformation
            use_tanh=1,  # use tanh?
            use_relu=0,  # use ReLU?
            bidirectional=False,  # bidirectional RNN ?
            use_kernel=p.use_kernel,
        )
        self.rnn.cuda()
Ejemplo n.º 30
0
def run_sru(cpu=0,
            gpu=0,
            jit=False,
            use_kernel=False,
            backward=False,
            warmup=10,
            benchmark=20):
    assert not (jit and use_kernel)
    benchmark_init(0, 0, True)

    # input has length 20, batch size 32 and dimension 128
    x = Variable(torch.rand(20, 32, 128).cuda())
    input_size, hidden_size = 128, 128

    rnn = SRU(
        input_size,
        hidden_size,
        num_layers=2,  # number of stacking RNN layers
        dropout=0.00001,  # dropout applied between RNN layers
        rnn_dropout=
        0.0001,  # variational dropout applied on linear transformation
        use_tanh=1,  # use tanh?
        use_relu=0,  # use ReLU?
        bidirectional=False,  # bidirectional RNN ?
        use_kernel=use_kernel,
        jit=jit,
    )
    rnn.cuda()

    kernel_tag = '_kernel' if use_kernel else ''
    backward_tag = '_training' if backward else '_forward'
    jit_tag = '_jit' if jit else ''
    name = 'sru{}{}{}'.format(backward_tag, kernel_tag, jit_tag)
    iter_timer = Bench(cuda=True, name=name, warmup_iters=warmup)

    for _ in range(warmup + benchmark):
        gc.collect()
        with iter_timer:
            output, hidden = rnn(x)  # forward pass
            if backward:
                output.sum().backward()
        # output is (length, batch size, hidden size * number of directions)
        # hidden is (layers, batch size, hidden size * number of directions)
    return iter_timer