import torch.nn as nn from train_test_lstm import train, test from MusicDataset import Musicdata_LSTM basic_dir = 'D:/OneDrive-UCalgary/OneDrive - University of Calgary/data/cal500/' data_file = basic_dir + 'music-data-v7.csv' label_file = basic_dir + 'labels-v5.csv' record_file = 'record-lstm.txt' model_path = 'lstm.pt' net_name = 'lstm' net = nn.LSTM(16, 18, 2) train_set = Musicdata_LSTM(data_file=data_file, label_file=label_file, start=0, total=2560) test_set = Musicdata_LSTM(data_file=data_file, label_file=label_file, start=2560, total=3219) net = train(net, model_path=model_path, dataset=train_set) test(net, net_name, dataset=test_set)
def __init__(self, input_size, hidden_size, output_size, num_layer): super(net, self).__init__() self.layer1 = nn.LSTM(input_size, hidden_size, num_layer) self.layer2 = nn.Linear(hidden_size, output_size) self.layer3 = nn.Softmax()
def __init__(self, input_size, hidden_size, output_size=1, num_layers=2): super(LSTM, self).__init__() self.lstm = nn.LSTM(input_size, hidden_size, num_layers) self.out = nn.Linear(hidden_size, output_size)
def __init__(self, n_actions): super(Network, self).__init__() self.cnn = CNN(n_actions) self.rnn = nn.LSTM(1447, 256, 1) self.out = nn.Linear(256, n_actions) self.to(device)
except OverflowError: perplexity = float('inf') if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % (epoch + 1, perplexity, time.time() - start)) for prefix in prefixes: print( ' -', predict_rnn_pytorch(prefix, pred_len, model, vocab_size, device, idx_to_char, char_to_idx)) (corpus_indices, char_to_idx, idx_to_char, vocab_size) = load_data_jay_lyrics() # 初始化模型参数 num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size print('will use', device) # 训练并创作 num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e-2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] lstm_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens) model = RNNModel(lstm_layer, vocab_size) train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
""" https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html """ import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim torch.manual_seed(1) lstm = nn.LSTM(3, 4) # Input dim is 3, output dim is 3 inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5 # initialize the hidden state. hidden = (torch.randn(1, 1, 4), torch.randn(1, 1, 4)) for i in inputs: # Step through the sequence one element at a time. # after each step, hidden contains the hidden state. out, hidden = lstm(i.view(1, 1, -1), hidden) print(out) print(hidden) print('@') # lstm = nn.LSTM(3, 4) # Input dim is 3, output dim is 3 # alternatively, we can do the entire sequence all at once. # the first value returned by LSTM is all of the hidden states throughout # the sequence. the second is just the most recent hidden state # (compare the last slice of "out" with "hidden" below, they are the same) # The reason for this is that: # "out" will give you access to all hidden states in the sequence
def __init__(self, args): super(LSTM, self).__init__() self.args = args self.lstm = nn.LSTM(args.embed_size, args.lstm_hidden_size, batch_first=True, dropout=args.drop_out_lstm, num_layers=args.lstm_num_layers,bidirectional = args.bidirectional)
def __init__(self, input_dim, hidden_dim, output_dim): super(LSTMTagger, self).__init__() self.lstm = nn.LSTM(input_dim, hidden_dim) self.hidden2tag = nn.Linear(hidden_dim, output_dim)
def __init__(self, in_dim, hidden_dim, n_layer, n_class): super(RNN, self).__init__() self.n_layer = n_layer self.hidden = hidden_dim self.lstm = nn.LSTM(in_dim, hidden_dim, n_layer, batch_first=True) self.classifier = nn.Linear(hidden_dim, n_class)
def __init__(self, hps, *_): super(BiLSTMTagger, self).__init__() batch_size = hps['batch_size'] lstm_hidden_dim = hps['sent_hdim'] sent_embedding_dim = 3 * hps['sent_edim'] + 1 * hps['pos_edim'] # for the region mark sent_embedding_dim += 1 role_embedding_dim = hps['role_edim'] frame_embedding_dim = role_embedding_dim vocab_size = hps['vword'] self.tagset_size = hps['vbio'] self.pos_size = hps['vpos'] self.dep_size = hps['vdep'] self.frameset_size = hps['vframe'] self.num_layers = hps['rec_layers'] self.batch_size = batch_size self.hidden_dim = lstm_hidden_dim self.word_emb_dim = hps['sent_edim'] self.specific_dep_size = hps['svdep'] self.word_embeddings = nn.Embedding(vocab_size, hps['sent_edim']) self.pos_embeddings = nn.Embedding(self.pos_size, hps['pos_edim']) self.dep_embeddings = nn.Embedding(self.dep_size, hps['pos_edim']) self.p_lemma_embeddings = nn.Embedding(self.frameset_size, hps['sent_edim']) # self.lr_dep_embeddings = nn.Embedding(self.lr_dep_size, hps[]) self.word_fixed_embeddings = nn.Embedding(vocab_size, hps['sent_edim']) self.word_fixed_embeddings.weight.data.copy_( torch.from_numpy(hps['word_embeddings'])) self.role_embeddings = nn.Embedding(self.tagset_size, role_embedding_dim) self.frame_embeddings = nn.Embedding(self.frameset_size, frame_embedding_dim) self.hidden2tag_M = nn.Linear(100, 200) self.hidden2tag_H = nn.Linear(100, 200) self.MLP = nn.Linear(200, self.dep_size) self.hidden2tag_spe = nn.Linear(100, 100) self.MLP_spe = nn.Linear(100, 4) self.word_emb_dropout = nn.Dropout(p=0.3) self.hidden_state_dropout = nn.Dropout(p=0.3) self.label_dropout = nn.Dropout(p=0.5) self.link_dropout = nn.Dropout(p=0.5) self.Label_hidden2hidden = nn.Linear(100, 20) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. self.num_layers = 2 self.BiLSTM_share = nn.LSTM(input_size=sent_embedding_dim, hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_share.all_weights[0][0]) init.orthogonal_(self.BiLSTM_share.all_weights[0][1]) init.orthogonal_(self.BiLSTM_share.all_weights[1][0]) init.orthogonal_(self.BiLSTM_share.all_weights[1][1]) self.num_layers = 1 self.BiLSTM_Spe = nn.LSTM(input_size=lstm_hidden_dim * 2, hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_Spe.all_weights[0][0]) init.orthogonal_(self.BiLSTM_Spe.all_weights[0][1]) init.orthogonal_(self.BiLSTM_Spe.all_weights[1][0]) init.orthogonal_(self.BiLSTM_Spe.all_weights[1][1]) self.num_layers = 1 self.BiLSTM_SRL = nn.LSTM(input_size=lstm_hidden_dim * 2 + 20, hidden_size=lstm_hidden_dim, batch_first=True, bidirectional=True, num_layers=self.num_layers) init.orthogonal_(self.BiLSTM_SRL.all_weights[0][0]) init.orthogonal_(self.BiLSTM_SRL.all_weights[0][1]) init.orthogonal_(self.BiLSTM_SRL.all_weights[1][0]) init.orthogonal_(self.BiLSTM_SRL.all_weights[1][1]) # non-linear map to role embedding self.role_map = nn.Linear(in_features=role_embedding_dim * 2, out_features=self.hidden_dim * 4) # Init hidden state self.hidden = self.init_hidden_share() self.hidden_2 = self.init_hidden_spe() self.hidden_3 = self.init_hidden_spe() self.hidden_4 = self.init_hidden_spe()
def create_lstm(): return [nn.LSTM(*get_size(l), 1) for l in range(n_layers)]
def __init__(self, n_char, char_dim, char_hidden): super(char_lstm, self).__init__() self.char_embed = nn.Embedding(n_char, char_dim) self.lstm = nn.LSTM(char_dim, char_hidden)
def __init__(self, vocab_size, embed_size, hidden_size): super().__init__() self.embed = nn.Embedding(vocab_size, embed_size) self.lstm = nn.LSTM(embed_size, hidden_size, num_layers=2, bidirectional=True) self.linear = nn.Linear(hidden_size * 2, 5) self.softmax = nn.Softmax(dim=2)
def __init__(self, options): super(AileverModel, self).__init__() self.lstm = nn.LSTM(2, options.dataset.sequence, 3, batch_first=True) self.linear = nn.Linear(options.dataset.sequence, options.dataset.prediction)
def __init__(self, nIn, nHidden, nOut): super(BidirectionalLSTM, self).__init__() self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) self.embedding = nn.Linear(nHidden * 2, nOut)
def __init__(self, vocab_size, embedding_dim, hidden_dim): super(PoetryModel, self).__init__() self.hidden_dim = hidden_dim self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, self.hidden_dim, num_layers=2) self.linear1 = nn.Linear(self.hidden_dim, vocab_size)
def __init__(self, hidden_size, n_layers=1): super(LSTM_Encoder, self).__init__() self.n_layers = n_layers self.hidden_size = hidden_size self.lstm = nn.LSTM(hidden_size, hidden_size)
embedding_dim=50).cuda() pos_u_embeddings = torch.nn.Embedding(num_embeddings=len(posUni) + 3, embedding_dim=10).cuda() pos_p_embeddings = torch.nn.Embedding(num_embeddings=len(posFine) + 3, embedding_dim=10).cuda() state_embeddings = torch.nn.Embedding(num_embeddings=len(itos_state), embedding_dim=50).cuda() #baseline = torch.nn.Embedding(num_embeddings = vocab_size+3, embedding_dim=1).cuda() #baseline_upos = torch.nn.Embedding(num_embeddings = len(posUni)+3, embedding_dim=1).cuda() #baseline_ppos = torch.nn.Embedding(num_embeddings = len(posFine)+3, embedding_dim=1).cuda() dropout = nn.Dropout(0.3).cuda() rnn = nn.LSTM(70, 128, 1).cuda() for name, param in rnn.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) rnn_state = nn.LSTM(50, 128, 1).cuda() for name, param in rnn_state.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) vocab_size_states = len(itos_state)
def __init__(self, input_size, hidden_size, num_layers, num_classes): super(simpleLSTM, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes)
def __init__(self, inputdim,hid_dim, layers): super(Decoder,self).__init__() self.hidden = hid_dim self.n_layers = layers self.lstm = nn.LSTM(inputdim, hid_dim, layers)
def __init__(self, embed_size, hidden_size, vocab_size, num_layers=1): super(DecoderRNN, self).__init__() self.embed = nn.Embedding(vocab_size, embed_size) self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) self.linear = nn.Linear(hidden_size, vocab_size)
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.model_embeddings = ModelEmbeddings(embed_size, vocab) self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab # default values self.encoder = None self.decoder = None self.h_projection = None self.c_projection = None self.att_projection = None self.combined_output_projection = None self.target_vocab_projection = None self.dropout = None # For sanity check only, not relevant to implementation self.gen_sanity_check = False self.counter = 0 ### YOUR CODE HERE (~8 Lines) ### TODO - Initialize the following variables: ### self.encoder (Bidirectional LSTM with bias) ### self.decoder (LSTM Cell with bias) ### self.h_projection (Linear Layer with no bias), called W_{h} in the PDF. ### self.c_projection (Linear Layer with no bias), called W_{c} in the PDF. ### self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF. ### self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF. ### self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF. ### self.dropout (Dropout Layer) ### ### Use the following docs to properly initialize these variables: ### LSTM: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM ### LSTM Cell: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell ### Linear Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Linear ### Dropout Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout self.encoder = nn.LSTM(input_size=embed_size, hidden_size=self.hidden_size, bias=True, bidirectional=True) self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size, hidden_size=self.hidden_size, bias=True) self.h_projection = nn.Linear(in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False) self.c_projection = nn.Linear(in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False) self.att_projection = nn.Linear(in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False) self.combined_output_projection = nn.Linear( in_features=3 * self.hidden_size, out_features=self.hidden_size, bias=False) self.target_vocab_projection = nn.Linear(in_features=self.hidden_size, out_features=len( self.vocab.tgt), bias=False) self.dropout = nn.Dropout(p=self.dropout_rate)
def __init__(self, x_tasks, model_config): self.criterion = self.avg_sharpe_ratio self.X_train_tasks = x_tasks self.tsteps = model_config["tsteps"] self.tasks_tsteps = model_config["tasks_tsteps"] self.batch_size = model_config["batch_size"] self.seq_len = model_config["seq_len"] self.device = model_config["device"] self.export_path = model_config["export_path"] self.export_label = model_config["export_label"] self.opt_lr = model_config["global_lstm_lstm"]["opt_lr"] self.amsgrad = model_config["global_lstm_lstm"]["amsgrad"] self.export_model = model_config["global_lstm_lstm"]["export_model"] self.in_n_layers = model_config["global_lstm_lstm"]["in_n_layers"] self.out_n_layers = model_config["global_lstm_lstm"]["out_n_layers"] self.out_nhi = model_config["global_lstm_lstm"]["out_nhi"] self.dropout = model_config["global_lstm_lstm"]["drop_rate"] self.in_transfer_dim = model_config["global_lstm_lstm"][ "in_transfer_dim"] self.out_transfer_dim = model_config["global_lstm_lstm"][ "out_transfer_dim"] self.transfer_layers = model_config["global_lstm_lstm"]["n_layers"] self.dropout_transfer = model_config["global_lstm_lstm"][ "drop_rate_transfer"] self.mtl_list = self.X_train_tasks.keys() ( self.sub_mtl_list, self.transfer_lstm_dict, self.model_in_dict, self.model_out_dict, self.model_lin_dict, self.opt_dict, self.signal_layer, self.losses, ) = ({}, {}, {}, {}, {}, {}, {}, {}) self.global_transfer_lstm = (nn.LSTM( self.in_transfer_dim, self.out_transfer_dim, self.transfer_layers, batch_first=True, dropout=self.dropout_transfer, ).double().to(self.device)) for tk in self.mtl_list: ( self.model_in_dict[tk], self.model_out_dict[tk], self.model_lin_dict[tk], self.signal_layer[tk], self.opt_dict[tk], self.losses[tk], ) = ({}, {}, {}, {}, {}, {}) self.sub_mtl_list[tk] = self.X_train_tasks[tk].keys() for sub_tk in self.sub_mtl_list[tk]: self.losses[tk][sub_tk] = [] nin = self.X_train_tasks[tk][sub_tk].shape[1] nout = self.X_train_tasks[tk][sub_tk].shape[1] in_n_layers, out_n_layers, out_nhi = ( self.in_n_layers, self.out_n_layers, self.out_nhi, ) self.model_in_dict[tk][sub_tk] = (nn.LSTM( nin, self.in_transfer_dim, in_n_layers, batch_first=True, dropout=self.dropout, ).double().to(self.device)) self.model_out_dict[tk][sub_tk] = (nn.LSTM( self.out_transfer_dim, out_nhi, out_n_layers, batch_first=True, dropout=self.dropout, ).double().to(self.device)) self.model_lin_dict[tk][sub_tk] = (nn.Linear( out_nhi, nout).double().to(self.device)) self.signal_layer[tk][sub_tk] = nn.Tanh().to(self.device) self.opt_dict[tk][sub_tk] = torch.optim.Adam( list(self.model_in_dict[tk][sub_tk].parameters()) + list(self.model_out_dict[tk][sub_tk].parameters()) + list(self.model_lin_dict[tk][sub_tk].parameters()) + list(self.global_transfer_lstm.parameters()) + list(self.signal_layer[tk][sub_tk].parameters()), lr=self.opt_lr, amsgrad=self.amsgrad, ) print( tk, sub_tk, self.model_in_dict[tk][sub_tk], self.model_out_dict[tk][sub_tk], self.model_lin_dict[tk][sub_tk], self.global_transfer_lstm, self.signal_layer[tk][sub_tk], self.opt_dict[tk][sub_tk], )
def __init__(self, embed_dim, vocab_len, output_len, lstm_layers) : super(Decoder1, self).__init__() self.embedding = nn.Embedding(vocab_len, embed_dim) self.lstm = nn.LSTM(embed_dim, output_len, lstm_layers, bidirectional=True) self.final_layer = nn.Linear(2*output_len, vocab_len)
def __init__( self, embed_size, hidden_size, vocab_size=None, embeddings=None, lstm_reduction="max", freeze=False, skip_embeddings=False, bidirectional=True, verbose=True, seed=123, lstm_num_layers=1, **kwargs, ): """ Args: embed_size: The (integer) size of the input at each time step; usually this is the size of the embeddings hidden_size: The size of the hidden layer in the LSTM vocab_size: The size of the vocabulary of the embeddings If embeddings=None, this helps to set the size of the randomly initilialized embeddings If embeddings!=None, this is used to double check that the provided embeddings have the intended size embeddings: An optional embedding Tensor lstm_reduction: One of ['mean', 'max', 'last', 'attention'] denoting what to return as the output of the LSTMLayer freeze: If False, allow the embeddings to be updated skip_embeddings: If True, directly accept X without using embeddings """ super().__init__() self.lstm_reduction = lstm_reduction self.output_dim = hidden_size * 2 if bidirectional else hidden_size self.verbose = verbose self.skip_embeddings = skip_embeddings if not self.skip_embeddings: # Load provided embeddings or randomly initialize new ones if embeddings is None: # Note: Need to set seed here for deterministic init if seed is not None: self._set_seed(seed) self.embeddings = nn.Embedding(vocab_size, embed_size) if self.verbose: print(f"Using randomly initialized embeddings.") else: self.embeddings = self._load_pretrained(embeddings) if self.verbose: print(f"Using pretrained embeddings.") # Freeze or not self.embeddings.weight.requires_grad = not freeze if self.verbose: if self.skip_embeddings: print("Skipping embeddings and using direct input.") else: print( f"Embeddings shape = ({self.embeddings.num_embeddings}, " f"{self.embeddings.embedding_dim})" ) print(f"The embeddings are {'' if freeze else 'NOT '}FROZEN") print(f"Using lstm_reduction = '{lstm_reduction}'") # Create lstm core # NOTE: We only pass explicitly-named kwargs here; can always add more! self.lstm = nn.LSTM( embed_size, hidden_size, num_layers=lstm_num_layers, batch_first=True, bidirectional=bidirectional, ) if lstm_reduction == "attention": att_size = hidden_size * (self.lstm.bidirectional + 1) att_param = nn.Parameter(torch.FloatTensor(att_size, 1)) nn.init.xavier_normal_(att_param) self.attention_param = att_param
def __init__(self, embed_dim, vocab_len, output_len, lstm_layers) : super(Encoder1, self).__init__() self.output_len = output_len self.embedding = nn.Embedding(vocab_len, embed_dim) self.lstm = nn.LSTM(embed_dim, output_len, lstm_layers, bidirectional=True)
def LSTM(input_size, hidden_size, **kwargs): m = nn.LSTM(input_size, hidden_size, **kwargs) for name, param in m.named_parameters(): if 'weight' in name or 'bias' in name: param.data.uniform_(-0.1, 0.1) return m
def __init__(self, hidden_size, n_node): super(DynamicScore, self).__init__() self.hidden_size = hidden_size self.state_lstm = nn.LSTM(self.hidden_size, self.hidden_size) self.residual_lstm = nn.LSTM(self.hidden_size, self.hidden_size) self.embedding = nn.Embedding(n_node, self.hidden_size)
def define_module(self): self.embed = nn.Embedding(self.d_vocab, self.d_text_feature) self.rnn_cell = nn.LSTM(self.d_text_feature, self.d_gen_hidden, self.d_gen_layers, dropout=(0 if self.d_gen_layers == 1 else self.gen_dropout), batch_first=True) self.drop = nn.Dropout(self.gen_dropout) self.fc_logits = nn.Linear(self.d_gen_hidden, self.d_vocab) self.log_soft = nn.LogSoftmax(dim=-1)
def __init__(self, ninputs, d_fmaps, kwidth, activation, audio_samples, bnorm=True, pooling=4, SND=False, pool_type='none', dropout=0, Genc=None, pool_size=8, num_spks=None): super(Discriminator, self).__init__(name='Discriminator') if Genc is None: if not isinstance(activation, list): activation = [activation] * len(d_fmaps) self.disc = nn.ModuleList() for d_i, d_fmap in enumerate(d_fmaps): act = activation[d_i] if d_i == 0: inp = ninputs else: inp = d_fmaps[d_i - 1] self.disc.append(DiscBlock(inp, kwidth, d_fmap, act, pooling=4)) else: print('Assigning Genc to D') # Genc and Denc MUST be same dimensions self.disc = Genc self.pool_type = pool_type if pool_type == 'none': # resize tensor to fit into FC directly pool_size *= d_fmaps[-1] if isinstance(act, nn.LeakyReLU): ''' Before feeding the audio to the FC layer module, it is scaled by 4. We will adapt the FC to the length of the wav. Example, if we work with 1s audios at 16000, we have initially 16000 samples and after the scaling, we would obtain 16000/4 = 4000 -> 4096. ''' input_dim = [ 2**i for i in range(0, 15) ] # defining powers of 2 until 16384, which would fit for 4 audio seconds. input_dim.append( 3072 ) # THIS IS HARDCODED! We have seen we need this input dimension when working with 0.7 s. num_neurons = min(input_dim, key=lambda x: abs(x - audio_samples / 4)) self.fc = nn.Sequential(nn.Linear(num_neurons, 256), nn.ReLU(inplace=True), nn.Linear(256, 128), nn.ReLU(inplace=True), nn.Linear(128, 128)) else: self.fc = nn.Sequential(nn.Linear(pool_size, 256), nn.PReLU(256), nn.Linear(256, 128), nn.PReLU(128), nn.Linear(128, 128)) elif pool_type == 'rnn': if bnorm: self.ln = LayerNorm() pool_size = 128 self.rnn = nn.LSTM(d_fmaps[-1], pool_size, batch_first=True, bidirectional=True) # bidirectional size pool_size *= 2 self.fc = nn.Linear(pool_size, 1) elif pool_type == 'conv': self.pool_conv = nn.Conv1d(d_fmaps[-1], 1, 1) self.fc = nn.Linear(pool_size, 1) else: raise TypeError('Unrecognized pool type: ', pool_type) outs = 1 if num_spks is not None: outs += num_spks