Esempio n. 1
0
    def __init__(self, config):
        super(conv_VQ_encoder, self).__init__()
        conv_init = config['conv_init']
        conv = config['conv']
        att = config['att']
        VQ = config['VQ']
        self.max_len = config['max_len']
        # initial conv over the full feature dimension
        self.Conv = nn.Conv1d(in_channels=conv_init['in_channels'],
                              out_channels=conv_init['out_channels'],
                              kernel_size=conv_init['kernel_size'],
                              stride=conv_init['stride'],
                              padding=conv_init['padding'])
        # residual conv blocks
        self.res_convs = nn.ModuleList()
        for x in range(conv['n_layers']):
            self.res_convs.append(
                res_conv(in_ch=conv['in_channels'][x],
                         out_ch=conv['out_channels'][x],
                         ks=conv['kernel_size'][x],
                         stride=conv['stride'][x]))
        # VQ layers
        self.VQ = nn.ModuleList()
        for x in range(VQ['n_layers']):
            self.VQ.append(VQ_EMA_layer(VQ['n_embs'][x], VQ['emb_dim'][x]))

        # final pooling over the full time dimension
        self.att = multi_attention(in_size=att['in_size'],
                                   hidden_size=att['hidden_size'],
                                   n_heads=att['heads'])
        # application order of the vq and conv layers. list with 1 bool per
        # VQ/res_conv layer.
        self.app_order = config['app_order']
Esempio n. 2
0
    def __init__(self, config):
        super(audio_rnn_encoder, self).__init__()
        conv = config['conv']
        rnn = config['rnn']
        VQ = config['VQ']
        att = config['att']
        self.max_len = rnn['max_len']
        #self.norm = nn.LayerNorm(conv['in_channels'])
        self.Conv = nn.Conv1d(in_channels=conv['in_channels'],
                              out_channels=conv['out_channels'],
                              kernel_size=conv['kernel_size'],
                              stride=conv['stride'],
                              padding=conv['padding'])
        self.RNN = nn.ModuleList()
        for x in range(len(rnn['n_layers'])):
            self.RNN.append(
                nn.LSTM(input_size=rnn['input_size'][x],
                        hidden_size=rnn['hidden_size'][x],
                        num_layers=rnn['n_layers'][x],
                        batch_first=rnn['batch_first'],
                        bidirectional=rnn['bidirectional'],
                        dropout=rnn['dropout']))
        # VQ layers
        self.VQ = nn.ModuleList()
        for x in range(VQ['n_layers']):
            self.VQ.append(VQ_EMA_layer(VQ['n_embs'][x], VQ['emb_dim'][x]))

        self.att = multi_attention(in_size=att['in_size'],
                                   hidden_size=att['hidden_size'],
                                   n_heads=att['heads'])
        # application order of the vq and conv layers. list with 1 bool per
        # VQ/res_conv layer.
        self.app_order = config['app_order']
Esempio n. 3
0
    def __init__(self, config, log = True):
        super(nwp_rnn_att, self).__init__()
        
        embed = config['embed']
        rnn = config['rnn']
        lin = config['lin']
        att = config ['att']

        self.max_len = config['max_len']
        self.embed = nn.Embedding(num_embeddings = embed['n_embeddings'], 
                                  embedding_dim = embed['embedding_dim'], 
                                  sparse = embed['sparse'],
                                  padding_idx = embed['padding_idx'])

        self.RNN = nn.GRU(input_size = rnn['in_size'], 
                          hidden_size = rnn['hidden_size'], 
                          num_layers = rnn['n_layers'], 
                          batch_first = rnn['batch_first'],
                          bidirectional = rnn['bidirectional'], 
                          dropout = rnn['dropout'])

        self.att = multi_attention(in_size = rnn['hidden_size'], 
                                   hidden_size = att['hidden_size'], 
                                   n_heads = att['heads'])
        
        self.linear = nn.Sequential(nn.Linear(rnn['hidden_size'], 
                                              lin['hidden_size']
                                              ), 
                                    nn.ReLU(), 
                                    nn.Linear(lin['hidden_size'],
                                              embed['n_embeddings']
                                              )
                                    )
        if log:
            self.log(embed, rnn, lin, att)
Esempio n. 4
0
 def __init__(self, config):
     super(audio_rnn_encoder, self).__init__()
     conv = config['conv']
     rnn= config['rnn']
     att = config ['att'] 
     self.Conv = nn.Conv1d(in_channels = conv['in_channels'], 
                               out_channels = conv['out_channels'], kernel_size = conv['kernel_size'],
                               stride = conv['stride'], padding = conv['padding'])
     self.RNN = nn.GRU(input_size = rnn['input_size'], hidden_size = rnn['hidden_size'], 
                       num_layers = rnn['num_layers'], batch_first = rnn['batch_first'],
                       bidirectional = rnn['bidirectional'], dropout = rnn['dropout'])
     self.att = multi_attention(in_size = att['in_size'], hidden_size = att['hidden_size'], n_heads = att['heads'])
Esempio n. 5
0
 def __init__(self, config):
     super(text_rnn_encoder, self).__init__()
     embed = config['embed']
     rnn= config['rnn']
     att = config ['att'] 
     self.embed = nn.Embedding(num_embeddings = embed['num_chars'], 
                               embedding_dim = embed['embedding_dim'], sparse = embed['sparse'],
                               padding_idx = embed['padding_idx'])
     self.RNN = nn.GRU(input_size = rnn['input_size'], hidden_size = rnn['hidden_size'], 
                       num_layers = rnn['num_layers'], batch_first = rnn['batch_first'],
                       bidirectional = rnn['bidirectional'], dropout = rnn['dropout'])
     self.att = multi_attention(in_size = att['in_size'], hidden_size = att['hidden_size'], n_heads = att['heads'])