def __init__(self, num_words, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, activ=ACTIV): my_rnn = RNN( input_size=num_words, hidden_size=hidden_size, num_layers=num_layers, nonlinearity=activ, )
def build_encoder(args, vocab): """Builds the encoder to params.""" input_size = len(vocab.source) rnn_layer = None bidirectional = False if args.encoder_mode != 'bigru' else True dropout = args.rnn_dropout if args.encoder_layers != 1 else 0 if args.encoder_mode == 'rnn': rnn_layer = RNN(args.hidden_size, args.hidden_size, num_layers=args.encoder_layers, dropout=dropout, batch_first=True) elif args.encoder_mode == 'gru' or args.encoder_mode == 'bigru': rnn_layer = GRU(args.hidden_size, args.hidden_size, num_layers=args.encoder_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True) else: raise ValueError('Invalid encoder mode: %s' % (args.encoder_mode)) return Encoder(input_size, args.hidden_size, rnn_layer, bidirectional=bidirectional)
def test_dynamic_rnn(sequence_embedding): sequence, mask = sequence_embedding hidden_size = 4 batch_size = 3 sequence_len = 3 rnn = RNN(input_size=2, hidden_size=4, num_layers=2, batch_first=True, bidirectional=True) dynamic_rnn = DynamicRnn(rnn=rnn) rnn_output: DynamicRnnOutput = dynamic_rnn(sequence=sequence, mask=mask) logging.info(json2str(rnn_output)) last_layer_h_n: torch.Tensor = rnn_output.last_layer_h_n last_layer_h_n_expect_size = (batch_size, hidden_size * 2) ASSERT.assertEqual(last_layer_h_n_expect_size, last_layer_h_n.size()) ASSERT.assertTrue(rnn_output.last_layer_c_n is None) sequence_encoding_expect_size = (batch_size, sequence_len, hidden_size * 2) senquence_encoding = rnn_output.output ASSERT.assertEqual(sequence_encoding_expect_size, senquence_encoding.size())
def setUp(self): super().setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = torch.ones(5, 7).bool() mask[1, 6:] = False mask[2, :] = False # <= completely masked mask[3, 2:] = False mask[4, :] = False # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length( tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices
def __init__(self, vocab_size, emb_dim, hidden_size, weight, kqv_dim, rnn_type='gru', bidirectional=False, batch_first=False, padding_idx=None): super(ZXOTextEncoder, self).__init__() self.embed = nn.Embedding(vocab_size, embedding_dim=emb_dim, _weight=weight) if rnn_type == 'rnn': self.rnn = RNN(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) elif rnn_type == 'gru': self.rnn = GRU(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) elif rnn_type == 'lstm': self.rnn = LSTM(emb_dim, hidden_size, bidirectional=bidirectional, num_layers=6, batch_first=batch_first) self.attn = Attn(emb_dim, kqv_dim) self.linear = nn.Linear(emb_dim, 2)
def forward(self, x): "Pass through" outs = [] for l in self.conv1s: out = pad_layer(x, l) outs.append(out) out = torch.cat(outs + [x], dim=1) out = F.leaky_relu(out, negative_slope=self.ns) out = self.conv_block(out, [self.conv2], [self.ins_norm1, self.drop1], res=False) emb2 = self.emb2(out) out = self.conv_block(out, [self.conv3, self.conv4], [self.ins_norm2, self.drop2]) emb4 = self.emb4(out) out = self.conv_block(out, [self.conv5, self.conv6], [self.ins_norm3, self.drop3]) emb6 = self.emb6(out) out = self.conv_block(out, [self.conv7, self.conv8], [self.ins_norm4, self.drop4]) emb8 = self.emb8(out) # dense layer out = self.dense_block(out, [self.dense1, self.dense2], [self.ins_norm5, self.drop5], res=True) embd2 = self.embd2(out) out = self.dense_block(out, [self.dense3, self.dense4], [self.ins_norm6, self.drop6], res=True) embd4 = self.embd4(out) out_rnn = RNN(out, self.RNN) embrnn = self.embrnn(out) out = torch.cat([out, out_rnn], dim=1) out = linear(out, self.linear) out = F.leaky_relu(out, negative_slope=self.ns) return (out, (emb2, emb4, emb6, emb8, embd2, embd4, embrnn))
def __init__(self, idim: int, hdim: int, nlayers: int = 1, enc_type: str = "blstm"): """ This represents the computation that happens for 1 RNN Layer Uses packing,padding utils from Pytorch :param int input_dim- The input size of the RNN :param int hidden_dim- The hidden size of the RNN :param int nlayers- Number of RNN Layers :param str enc_type : Type of encoder- RNN/GRU/LSTM """ super(RNNLayer, self).__init__() bidir = True if enc_type[0] == 'b' else False enc_type = enc_type[1:] if enc_type[0] == 'b' else enc_type if enc_type == "rnn": self.elayer = RNN(idim, hdim, nlayers, batch_first=True, bidirectional=bidir) elif enc_type == "lstm": self.elayer = LSTM(idim, hdim, nlayers, batch_first=True, bidirectional=bidir) else: self.elayer = GRU(idim, hdim, nlayers, batch_first=True, bidirectional=bidir)
def __init__(self): super(_TolstoiRNNVersion, self).__init__() self.lstm = RNN( input_size=self.hidden_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, dropout=0.36, batch_first=True, )
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._args = args self._kwargs = kwargs RNN = get_rnn_impl("CPU", self.rnn_type, kwargs["layer_norm"]) self.rnns = nn.ModuleList() for i, o in zip(self._is, self._os): # r = RNN(i, o, batch_first=self.batch_first, zoneout=ZONEOUT) r = RNN(i, o, batch_first=self.batch_first) self.rnns.append(r)
def __init__(self, input_size, args): super(RNN_model_linear, self).__init__() self.RNN = RNN(input_size=input_size, hidden_size=args.hidden_units, num_layers=args.lstm_layer) if 'bi' in args.model: bi_num = 2 else: bi_num = 1 self.classifier = nn.Linear(args.hidden_units * bi_num, args.n_label) self.args = args
def __init__(self, skill_size, rnn_h_size, rnn_layer_size, dropout_rate): """ :param skill_size: int 知识点数量 :param rnn_h_size: int rnn隐藏单元数量 :param rnn_layer_size: int rnn隐藏层数量 :param dropout_rate: float """ super(DktNet, self).__init__() self.rnn = RNN(skill_size * 2, rnn_h_size, rnn_layer_size) self.dropout = Dropout(p=dropout_rate) self.linear = Linear(rnn_h_size, skill_size) self.sigmoid = Sigmoid()
def __init__(self, hidden_size, *args, **kwargs): super().__init__() self.module = RNN(hidden_size=hidden_size, *args, **kwargs, nonlinearity='relu') # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN # I'm not sure what is going on here, this is what weight_drop does so I stick to it self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition # We need to register it in this module to make it work with weight dropout w_hh = FloatTensor(hidden_size).type_as(getattr(self.module, 'weight_hh_l0').data) w_hh.uniform_(-1, 1) getattr(self.module, 'bias_ih_l0').data.fill_(0) getattr(self.module, 'bias_hh_l0').data.fill_(0) self.register_parameter(name='weight_hh_l0', param=Parameter(w_hh)) del self.module._parameters['weight_hh_l0']
def build_rnn_layer(args): multiplier = 1 if args.attention_mode != 'none' and args.encoder_mode == 'bigru': multiplier = 3 elif args.attention_mode != 'none' and args.encoder_mode != 'bigru': multiplier = 2 dropout = args.rnn_dropout if args.decoder_layers != 1 else 0 if args.decoder_mode == 'rnn': return RNN(multiplier * args.hidden_size, args.hidden_size, num_layers=args.decoder_layers, dropout=dropout, batch_first=True) elif args.decoder_mode == 'gru': return GRU(multiplier * args.hidden_size, args.hidden_size, num_layers=args.decoder_layers, dropout=dropout, batch_first=True) else: raise ValueError('Invalid decoder mode: %s' % (args.decoder_mode))
def forward(self, x, c): "Pass through" # emb = self.emb(c) (emb2, emb4, emb6, emb8, embd2, embd4, embrnn) = c # conv layer out = self.conv_block(x, [self.conv1, self.conv2], self.ins_norm1, embrnn, res=True) out = self.conv_block(out, [self.conv3, self.conv4], self.ins_norm2, embd4, res=True) out = self.conv_block(out, [self.conv5, self.conv6], self.ins_norm3, embd2, res=True) # dense layer out = self.dense_block(out, emb8, [self.dense1, self.dense2], self.ins_norm4, res=True) out = self.dense_block(out, emb6, [self.dense3, self.dense4], self.ins_norm5, res=True) out_appended = append_emb(emb4, out.size(2), out) # rnn layer out_rnn = RNN(out_appended, self.RNN) out = torch.cat([out, out_rnn], dim=1) out = append_emb(emb2, out.size(2), out) out = linear(out, self.dense5) out = F.leaky_relu(out, negative_slope=self.ns) out = linear(out, self.linear) out = out.exp() return out
from backpack.custom_module.permute import Permute from backpack.custom_module.reduce_tuple import ReduceTuple SHARED_SETTINGS = SECONDORDER_SETTINGS LOCAL_SETTINGS = [] ################################################################## # RNN settings # ################################################################## LOCAL_SETTINGS += [ # RNN settings { "input_fn": lambda: rand(8, 5, 6), "module_fn": lambda: Sequential( RNN(input_size=6, hidden_size=3, batch_first=True), ReduceTuple(index=0), Permute(0, 2, 1), Flatten(), ), "loss_function_fn": lambda: MSELoss(), "target_fn": lambda: regression_targets((8, 3 * 5)), }, { "input_fn": lambda: rand(4, 3, 5), "module_fn": lambda: Sequential( LSTM(input_size=5, hidden_size=4, batch_first=True),
def __init__(self, args, emb_index, bidirec, initial_mean_value, overal_maxlen=0): super(REGRESSION, self).__init__() self.dropout_W = 0.5 # default=0.5 self.dropout_U = 0.1 # default=0.1 self.args = args cnn_border_mode = 'same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.recurrent_unit == 'lstm': from torch.nn import LSTM as RNN elif args.recurrent_unit == 'gru': from torch.nn import GRU as RNN elif args.recurrent_unit == 'simple': from torch.nn import RNN as RNN self.embed = Embedding(args.vocab_size, args.emb_dim) outputdim = args.emb_dim if args.cnn_dim > 0: self.conv = Conv1DWithMasking(outputdim, args.cnn_dim, args.cnn_window_size, 1, (args.cnn_window_size - 1) // 2) outputdim = args.cnn_dim if args.rnn_dim > 0: self.rnn = RNN(outputdim, args.rnn_dim, num_layers=1, bias=True, dropout=self.dropout_W, batch_first=True, bidirectional=bidirec) outputdim = args.rnn_dim if bidirec == 1: outputdim = args.rnn_dim * 2 if args.dropout_prob > 0: self.dropout = Dropout(args.dropout_prob) if args.aggregation == 'mot': self.mot = MeanOverTime() elif args.aggregation.startswith('att'): self.att = Attention(outputdim, op=args.aggregation, activation='tanh', init_stdev=0.01) self.linear = Linear(outputdim, num_outputs) # if not args.skip_init_bias: # self.linear.bias.data = (torch.log(initial_mean_value) - torch.log(1 - initial_mean_value)).float() self.emb_index = emb_index if args.emb_path: from .w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) self.embed[ emb_index].weight.data = emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()) logger.info(' Done')
def __init__(self, input_size, args): super(RNN_model, self).__init__() self.RNN = RNN(input_size=input_size, hidden_size=args.hidden_units, num_layers=args.lstm_layer) self.args = args
#x = x + 10 #Sx_all = torch.cat((Sx_all, x), dim=0) # Select Training Data. Sx_tr, y_tr = Sx_all[subset == 0], y_all[subset == 0] # Set Mean to 0, and variance to 1. -> Normal Distribution mu_tr = Sx_tr.mean(dim=0) std_tr = Sx_tr.std(dim=0) Sx_tr = (Sx_tr - mu_tr) / std_tr # Design ML Model num_inputs = Sx_tr.shape[-1] num_classes = y_tr.cpu().unique().numel() model = Sequential(Linear(num_inputs, num_classes), LogSoftmax(dim=1)) rnn = RNN(336, 336) optimizer = Adam(model.parameters()) criterion = NLLLoss() if use_cuda: model = model.cuda() criterion = criterion.cuda() # Number of signals to use in each gradient descent step (batch). batch_size = 32 # Number of epochs. num_epochs = 80 # Learning rate for Adam. lr = 1e-2 # set number of batches
def train_torch(self, X, y_true, batch_size, learning_rate, num_epochs, print_many, verbose): self.batch_size = batch_size progresses = { int(num_epochs // (100 / i)): i for i in range(1, 101, 1) } t0 = counter() durations = [] device = torch.device('cuda:0') rnn = RNN(input_size=self.input_dim, hidden_size=self.hidden_dim, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False).to(device) fc = FCLayer(self.hidden_dim, self.output_size, bias=True).to(device) params = [rnn.parameters(), fc.params()] optimizer = SGD(chain(*params), lr=learning_rate) for epoch in range(num_epochs): epoch_loss = 0 for i in range(self.max_iters): x_batch = X[i * self.batch_size:(i + 1) * self.batch_size] x_batch = np.array( [x_batch[:, step, :] for step in range(self.time_steps)]) y_true_batch = y_true[i * self.batch_size:(i + 1) * self.batch_size] batch_size_local = x_batch.shape[1] # convert to pytorch tensor y_true_batch = y_true_batch.astype(np.int64) y_true_batch = torch.tensor(y_true_batch, requires_grad=False).to(device) x_batch = x_batch.astype(np.float32) x_batch = torch.tensor(x_batch, requires_grad=True).to(device) # forward pass h_stack, h_last = rnn.forward(x_batch, hx=None) fc_out = fc.forward(h_last) log_y_pred = F.log_softmax(input=fc_out, dim=2) log_y_pred = log_y_pred.view(batch_size_local, self.output_size) loss = F.nll_loss(input=log_y_pred, target=y_true_batch, reduction='mean') # update gradient optimizer.zero_grad() loss.backward() epoch_loss += loss.item() optimizer.step() durations.append(counter() - t0) t0 = counter() if (print_many and epoch % 100 == 0) or (not print_many and epoch in progresses): print( f"after epoch: {epoch}, epoch_losses: {round(epoch_loss / self.max_iters, 3)}" ) if verbose > 0: avg_epoch_time = sum(durations) / len(durations) print("average epoch time:", round(avg_epoch_time, 3)) return avg_epoch_time
import torch from torch.nn import RNN, LSTM rnn = RNN(input_size=4, hidden_size=5, batch_first=True) inputs = torch.rand(2, 3, 4) outputs, hn = rnn(inputs) print(outputs, outputs.shape) print(hn, hn.shape) lstm = LSTM(input_size=4, hidden_size=6, batch_first=True) outputs, (hn, cn) = lstm(inputs) print(outputs, outputs.shape) print(hn, hn.shape) print(cn, cn.shape)
def build( self, name: str, embedding_dim: int, hidden_size: int = 32, num_filters: int = 1, num_heads: int = 3, output_dim: int = 30, ngram_filter_sizes: Tuple = (1, 2, 3, 4, 5), filters: List[List[int]] = [[1, 4], [2, 8], [3, 16], [4, 32], [5, 64]], num_highway: int = 2, projection_dim: int = 16 ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: encoder = None if name in {'boe'}: encoder = BagOfEmbeddingsEncoder(embedding_dim=embedding_dim, averaged=True) elif name in {'cnn'}: encoder = CnnEncoder(embedding_dim=embedding_dim, num_filters=num_filters, ngram_filter_sizes=ngram_filter_sizes, output_dim=output_dim) elif name in {'cnnh'}: encoder = CnnHighwayEncoder(embedding_dim=embedding_dim, filters=filters, num_highway=num_highway, projection_dim=projection_dim, projection_location="after_cnn") elif name in {'rnn'}: rnn = RNN(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(rnn) elif name in {'lstm'}: lstm = LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(lstm) elif name in {'gru'}: gru = GRU(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(gru) elif name in {'intra'}: intra = IntraSentenceAttentionEncoder(input_dim=embedding_dim, projection_dim=output_dim, combination="1,2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim + output_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(intra(x, y), y) elif name in {'multihead'}: sim = MultiHeadedSimilarity(num_heads, embedding_dim) multi = IntraSentenceAttentionEncoder( input_dim=embedding_dim, projection_dim=embedding_dim, similarity_function=sim, num_attention_heads=num_heads, combination="1+2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(multi(x, y), y) assert encoder is not None return encoder
def __init__(self, nb_features, nb_frames, nb_layers, hidden_size, bidirectional=False, mixture_mean=None, mixture_scale=None, label_mean=None, activation_function="relu", recurrent_layer="lstm"): super(Generalised_Recurrent_Model, self).__init__() # set the hidden size self.hidden_size = hidden_size # create parameters with torch tensors for mean and scale self.mixture_mean = Parameter( torch.from_numpy(np.copy(mixture_mean).astype(np.float32))) self.label_scale = Parameter( torch.from_numpy(np.copy(mixture_scale).astype(np.float32))) # fully connected dense layer for input dimensionality reduction self.fc_dr = Linear(in_features=nb_features, out_features=hidden_size) # different recurrent layers recurrent_layers = { 'lstm': LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional), 'gru': GRU(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional), 'rnn': RNN(input_size=hidden_size, hidden_size=hidden_size, num_layers=nb_layers, batch_first=True, bidirectional=bidirectional) } # recurrent layer self.recurrent_layer = recurrent_layers[recurrent_layer] self.lstm_output = hidden_size * 2 if bidirectional else hidden_size # fully connected dense layer for input dimensionality expansion self.fc_de = Linear(in_features=self.lstm_output, out_features=nb_features) # output label scaling self.label_scale = Parameter(torch.ones(nb_features)) # output label mean self.label_mean = Parameter( torch.from_numpy(np.copy(label_mean).astype(np.float32))) # activation function activation_functions = {'relu': F.relu, 'tanh': torch.tanh} self.activation_function = activation_functions[activation_function]