def __init__(self, config): super(conv_VQ_encoder, self).__init__() conv_init = config['conv_init'] conv = config['conv'] att = config['att'] VQ = config['VQ'] self.max_len = config['max_len'] # initial conv over the full feature dimension self.Conv = nn.Conv1d(in_channels=conv_init['in_channels'], out_channels=conv_init['out_channels'], kernel_size=conv_init['kernel_size'], stride=conv_init['stride'], padding=conv_init['padding']) # residual conv blocks self.res_convs = nn.ModuleList() for x in range(conv['n_layers']): self.res_convs.append( res_conv(in_ch=conv['in_channels'][x], out_ch=conv['out_channels'][x], ks=conv['kernel_size'][x], stride=conv['stride'][x])) # VQ layers self.VQ = nn.ModuleList() for x in range(VQ['n_layers']): self.VQ.append(VQ_EMA_layer(VQ['n_embs'][x], VQ['emb_dim'][x])) # final pooling over the full time dimension self.att = multi_attention(in_size=att['in_size'], hidden_size=att['hidden_size'], n_heads=att['heads']) # application order of the vq and conv layers. list with 1 bool per # VQ/res_conv layer. self.app_order = config['app_order']
def __init__(self, config): super(audio_rnn_encoder, self).__init__() conv = config['conv'] rnn = config['rnn'] VQ = config['VQ'] att = config['att'] self.max_len = rnn['max_len'] #self.norm = nn.LayerNorm(conv['in_channels']) self.Conv = nn.Conv1d(in_channels=conv['in_channels'], out_channels=conv['out_channels'], kernel_size=conv['kernel_size'], stride=conv['stride'], padding=conv['padding']) self.RNN = nn.ModuleList() for x in range(len(rnn['n_layers'])): self.RNN.append( nn.LSTM(input_size=rnn['input_size'][x], hidden_size=rnn['hidden_size'][x], num_layers=rnn['n_layers'][x], batch_first=rnn['batch_first'], bidirectional=rnn['bidirectional'], dropout=rnn['dropout'])) # VQ layers self.VQ = nn.ModuleList() for x in range(VQ['n_layers']): self.VQ.append(VQ_EMA_layer(VQ['n_embs'][x], VQ['emb_dim'][x])) self.att = multi_attention(in_size=att['in_size'], hidden_size=att['hidden_size'], n_heads=att['heads']) # application order of the vq and conv layers. list with 1 bool per # VQ/res_conv layer. self.app_order = config['app_order']
def __init__(self, config, log = True): super(nwp_rnn_att, self).__init__() embed = config['embed'] rnn = config['rnn'] lin = config['lin'] att = config ['att'] self.max_len = config['max_len'] self.embed = nn.Embedding(num_embeddings = embed['n_embeddings'], embedding_dim = embed['embedding_dim'], sparse = embed['sparse'], padding_idx = embed['padding_idx']) self.RNN = nn.GRU(input_size = rnn['in_size'], hidden_size = rnn['hidden_size'], num_layers = rnn['n_layers'], batch_first = rnn['batch_first'], bidirectional = rnn['bidirectional'], dropout = rnn['dropout']) self.att = multi_attention(in_size = rnn['hidden_size'], hidden_size = att['hidden_size'], n_heads = att['heads']) self.linear = nn.Sequential(nn.Linear(rnn['hidden_size'], lin['hidden_size'] ), nn.ReLU(), nn.Linear(lin['hidden_size'], embed['n_embeddings'] ) ) if log: self.log(embed, rnn, lin, att)
def __init__(self, config): super(audio_rnn_encoder, self).__init__() conv = config['conv'] rnn= config['rnn'] att = config ['att'] self.Conv = nn.Conv1d(in_channels = conv['in_channels'], out_channels = conv['out_channels'], kernel_size = conv['kernel_size'], stride = conv['stride'], padding = conv['padding']) self.RNN = nn.GRU(input_size = rnn['input_size'], hidden_size = rnn['hidden_size'], num_layers = rnn['num_layers'], batch_first = rnn['batch_first'], bidirectional = rnn['bidirectional'], dropout = rnn['dropout']) self.att = multi_attention(in_size = att['in_size'], hidden_size = att['hidden_size'], n_heads = att['heads'])
def __init__(self, config): super(text_rnn_encoder, self).__init__() embed = config['embed'] rnn= config['rnn'] att = config ['att'] self.embed = nn.Embedding(num_embeddings = embed['num_chars'], embedding_dim = embed['embedding_dim'], sparse = embed['sparse'], padding_idx = embed['padding_idx']) self.RNN = nn.GRU(input_size = rnn['input_size'], hidden_size = rnn['hidden_size'], num_layers = rnn['num_layers'], batch_first = rnn['batch_first'], bidirectional = rnn['bidirectional'], dropout = rnn['dropout']) self.att = multi_attention(in_size = att['in_size'], hidden_size = att['hidden_size'], n_heads = att['heads'])