def __init__(self, input_dim, hidden_dim, batch_dim=1, output_dim=1, num_layers=2): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.batch_dim = batch_dim self.output_dim = output_dim self.num_layers = num_layers self.lstm = LSTM(self.input_dim, self.hidden_dim, self.num_layers) self.fc = Linear(self.hidden_dim, self.output_dim) self.sigmoid = Sigmoid() self.lstm.to('cpu')
def test_forward_pulls_out_correct_tensor_without_sequence_lengths(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = torch.FloatTensor([[[.7, .8], [.1, 1.5]]]) lstm_output = lstm(input_tensor) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), lstm_output[0].data.numpy())
def __init__(self, bert_model_config: ElectraConfig, args): super(DocumentElectraLSTM, self).__init__(bert_model_config) self.bert = ElectraModel(bert_model_config) self.bert_batch_size = args['bert_batch_size'] self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) self.lstm = LSTM(bert_model_config.hidden_size, bert_model_config.hidden_size) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size, bert_model_config.num_labels), )
def __init__(self, config): super(MRGIN, self).__init__() self.num_features = config.num_features self.num_relations = config.num_relations self.num_classes = config.nclass self.num_layers = config.num_layers #defines number of RGCN conv layers. self.hidden_dim = config.hidden_dim self.layer_spec = None if config.layer_spec == None else list(map(int, config.layer_spec.split(','))) self.lstm_dim1 = config.lstm_input_dim self.lstm_dim2 = config.lstm_output_dim self.rgcn_func = FastRGCNConv if config.conv_type == "FastRGCNConv" else RGCNConv self.activation = F.relu if config.activation == 'relu' else F.leaky_relu self.pooling_type = config.pooling_type self.readout_type = config.readout_type self.temporal_type = config.temporal_type self.dropout = config.dropout self.conv = [] self.pool = [] total_dim = 0 if self.layer_spec == None: for i in range(self.num_layers): if i == 0: self.conv.append(self.rgcn_func(self.num_features, self.hidden_dim, self.num_relations).to(config.device)) else: self.conv.append(self.rgcn_func(self.hidden_dim, self.hidden_dim, self.num_relations).to(config.device)) if self.pooling_type == "sagpool": self.pool.append(RGCNSAGPooling(self.hidden_dim, self.num_relations, ratio=config.pooling_ratio, rgcn_func=config.conv_type).to(config.device)) elif self.pooling_type == "topk": self.pool.append(TopKPooling(self.hidden_dim, ratio=config.pooling_ratio).to(config.device)) total_dim += self.hidden_dim else: print("using layer specification and ignoring hidden_dim parameter.") print("layer_spec: " + str(self.layer_spec)) for i in range(self.num_layers): if i == 0: self.conv.append(self.rgcn_func(self.num_features, self.layer_spec[0], self.num_relations).to(config.device)) else: self.conv.append(self.rgcn_func(self.layer_spec[i-1], self.layer_spec[i], self.num_relations).to(config.device)) if self.pooling_type == "sagpool": self.pool.append(RGCNSAGPooling(self.layer_spec[i], self.num_relations, ratio=config.pooling_ratio, rgcn_func=config.conv_type).to(config.device)) elif self.pooling_type == "topk": self.pool.append(TopKPooling(self.layer_spec[i], ratio=config.pooling_ratio).to(config.device)) total_dim += self.layer_spec[i] self.fc1 = Linear(total_dim, self.lstm_dim1) if "lstm" in self.temporal_type: self.lstm = LSTM(self.lstm_dim1, self.lstm_dim2, batch_first=True) self.attn = Attention(self.lstm_dim2) self.fc2 = Linear(self.lstm_dim2, self.num_classes)
def forward(self, input, hx=None, grads=None): if grads is not None: self.resample_with_sharpening(grads, self.eta) weights = self.sampled_sharpen_weights elif self.training and self.BBB is True: self.sample() weights = self.sampled_weights else: weights = self.means # modify weights to pytorch format self.all_weights = self.get_all_weights(weights) # RNN base code is_packed = isinstance(input, PackedSequence) if is_packed: input, batch_sizes = input max_batch_size = batch_sizes[0] else: batch_sizes = None max_batch_size = input.size(0) if self.batch_first else input.size( 1) if hx is None: num_directions = 2 if self.bidirectional else 1 hx = torch.autograd.Variable(input.data.new( self.num_layers * num_directions, max_batch_size, self.hidden_size).zero_(), requires_grad=False) if self.mode == 'LSTM': hx = (hx, hx) batch_sizes = 1 func = LSTM( # mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, # num_layers=self.num_layers, batch_first=self.batch_first, dropout=self.dropout) # change this line input = input.view(batch_sizes, input.shape[-2], input.shape[-1]) # print('input',input.shape) if len(hx) > 1: # hx=hx[0] hx1, hx2 = hx[0], hx[1] hx1 = hx1.view(batch_sizes, hx1.shape[-2], hx1.shape[-1]) hx2 = hx2.view(batch_sizes, hx2.shape[-2], hx2.shape[-1]) hx = (hx1, hx2) output, hidden = func(input, hx) # print('output',output.shape) return output.squeeze(0), hidden[0].squeeze(0)
def __init__(self, input_size, hidden_size): super(RecurrentEmbedding, self).__init__() self.lstm = LSTM(input_size, hidden_size, 1, True, False, 0, False) self.hidden_size = hidden_size self.last_h = None self.last_c = None self.hidden_size = hidden_size self.reset() self.dbg_t = None self.seq = 0
def __init__(self, input_dim, hidden_dim, output_dim, bidirectional=False, cell='LSTM', num_layers=1): super(Decoder, self).__init__() rnn_hidden_dim = hidden_dim//2 if bidirectional else hidden_dim if cell == 'LSTM': self.rnn = LSTM(input_size=input_dim, hidden_size=rnn_hidden_dim, num_layers=num_layers, bidirectional=bidirectional, batch_first=True) elif cell == 'GRU': self.rnn = GRU(input_size=input_dim, hidden_size=rnn_hidden_dim, num_layers=num_layers, bidirectional=bidirectional, batch_first=True) rnn_hidden_output = rnn_hidden_dim * 2 if bidirectional else hidden_dim self.output_lin = nn.Linear(rnn_hidden_output, output_dim)
def __init__(self, params): super(LineDecoderCTC, self).__init__() self.use_hidden = params["use_hidden"] self.input_size = params["features_size"] self.vocab_size = params["vocab_size"] if self.use_hidden: self.hidden_size = params["hidden_size"] self.lstm = LSTM(self.input_size, self.hidden_size, num_layers=1) self.end_conv = Conv2d(in_channels=self.hidden_size, out_channels=self.vocab_size + 1, kernel_size=1) else: self.end_conv = Conv2d(in_channels=self.input_size, out_channels=self.vocab_size + 1, kernel_size=1)
def test_forward_does_not_compress_tensors_padded_to_greater_than_the_max_sequence_length(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.rand([5, 8, 3]) tensor[:, 7, :] = 0 mask = torch.ones(5, 8) mask[:, 7] = 0 input_tensor = Variable(tensor) mask = Variable(mask) encoder_output = encoder(input_tensor, mask) assert encoder_output.size(1) == 8
def __init__( self, hidden_size, num_layers, ) -> None: super().__init__() self.layer = LSTM( input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, ) set_lstm_spectral_norm(self.layer)
def __init__(self, input_size, hidden_size, seq_len, num_layers=1, bias=True, dropout=0, bidirectional=False): super(EncoderLSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.dropout = dropout self.dropout_state = {} self.bidirectional = bidirectional self.seq_len = seq_len num_directions = 2 if bidirectional else 1 self.lstm = LSTM(input_size, hidden_size, num_layers)
def __init__(self, bert_model_config: BertConfig): super(DocumentBertLSTM, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) self.bert_batch_size = self.bert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) self.lstm = LSTM( bert_model_config.hidden_size, bert_model_config.hidden_size, ) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size, bert_model_config.num_labels), nn.Tanh())
def __init__(self, config, params): super().__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.batch_size = params['batch_size'] self.weights = params['weights'] self.bert_batch_size = params['max_sentences_per_doc'] self.dropout = nn.Dropout(p=config.hidden_dropout_prob) self.lstm = LSTM(config.hidden_size, config.hidden_size) self.classifier = nn.Sequential( nn.Dropout(p=config.hidden_dropout_prob), nn.Linear(config.hidden_size, config.num_labels), nn.Tanh()) self.init_weights()
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0, bidirectional=False): super(PackedLSTM, self).__init__() self.hidden_size = hidden_size self.bidirectional = bidirectional self.lstm = LSTM(input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional)
def __init__(self, batch): super().__init__() num_input = len(aic_bones) + 2*len(aic_bone_pairs) self.num_hidden = 48 self.num_output = 2 self.batch = batch self.rnn = LSTM(input_size=num_input, hidden_size=self.num_hidden) self.lin1 = nn.Linear(self.num_hidden, self.num_output) self.drop = nn.Dropout(p=0.5) self.ckpt_path = Path('checkpoints/lstm.pt') self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.to(self.device, dtype=torch.float32)
def __init__(self, output_vocabulary_size, bert_name='bert-base-uncased', lstm_units_size=512, lstm_layers = 2, dropout_rate = 0.1): super(BertLSTM, self).__init__() self.bert = BERTEmbedder(bert_name) self.lstm = LSTM(input_size=self.bert.embedding_dim, hidden_size=lstm_units_size, num_layers=lstm_layers, bidirectional=True, batch_first=True) self.dropout = Dropout(p=dropout_rate) self.attention = Attention(in_features= lstm_units_size * 2) next_layer_in_features = lstm_units_size * 4 self.output = Linear(in_features=next_layer_in_features, out_features=output_vocabulary_size) if torch.cuda.is_available(): self.cuda()
def _create_layers(self, conv_layer=GCNConv): self.recurrent_layer = LSTM(input_size=self.in_channels, hidden_size=self.in_channels, num_layers=1) self.conv_layer = conv_layer(in_channels=self.in_channels, out_channels=self.in_channels, improved=self.improved, cached=self.cached, normalize=self.normalize, add_self_loops=self.add_self_loops, bias=False)
def test_wrapper_can_call_backward_with_zero_length_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = torch.rand([5, 7, 3]) mask = torch.ones(5, 7) mask[0, 3:] = 0 mask[1, 4:] = 0 mask[2, 0:] = 0 # zero length sequence mask[3, 6:] = 0 output = encoder(input_tensor, mask) output.sum().backward()
def _create_layers(self): self.conv_layer = GatedGraphConv( out_channels=self.conv_out_channels, num_layers=self.conv_num_layers, aggr=self.conv_aggr, bias=True, ) self.recurrent_layer = LSTM( input_size=self.conv_out_channels, hidden_size=self.lstm_out_channels, num_layers=self.lstm_num_layers, )
def __init__( self, decoding_dim: int, target_embedding_dim: int, attention: Optional[Attention] = None, bidirectional_input: bool = False, num_decoder_layers: int = 1, accumulate_hidden_states: bool = False, dropout: float = 0.2, ) -> None: super().__init__( decoding_dim=decoding_dim, target_embedding_dim=target_embedding_dim, decodes_parallel=False, ) # In this particular type of decoder output of previous step passes directly to the input of current step # We also assume that decoder output dimensionality is equal to the encoder output dimensionality decoder_input_dim = self.target_embedding_dim # Attention mechanism applied to the encoder output for each step. self._attention = attention if self._attention: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. encoder output dim will be same as decoding_dim decoder_input_dim += decoding_dim # Ensure that attention is only set during seq2seq setting. # if not self._seq2seq_mode and self._attention is not None: # raise ConfigurationError("Attention is only specified in Seq2Seq setting.") self._num_decoder_layers = num_decoder_layers if self._num_decoder_layers > 1: self._decoder_cell = LSTM( input_size=decoder_input_dim, hidden_size=self.decoding_dim, num_layers=self._num_decoder_layers, dropout=dropout, ) else: # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(decoder_input_dim, self.decoding_dim) self._bidirectional_input = bidirectional_input self._accumulate_hidden_states = accumulate_hidden_states
def __init__( self, latent_features, hidden_size, output_dim, ): super(Decoder, self).__init__() self.latent_features = latent_features self.hidden_size = hidden_size self.output_dim = output_dim self.rnn1 = LSTM( input_size=self.latent_features, hidden_size=self.latent_features, ) self.rnn2 = LSTM( input_size=self.latent_features, hidden_size=self.hidden_size, ) self.output_layer = Linear(hidden_size, self.output_dim)
def __init__(self, opts): super().__init__() self.opts = opts self.tactic_decoder = TacticDecoder( CFG(opts.tac_grammar, 'tactic_expr'), opts) self.term_encoder = TermEncoder(opts) self.tactic_embedding = Embedding(opts.num_tactics, 256, padding_idx=0) self.tactic_LSTM = LSTM(256, 256, 1, batch_first=True, bidirectional=True) self.tac_vocab = pickle.load(open(opts.tac_vocab_file, 'rb')) self.cutoff_len = opts.cutoff_len
def __init__(self, mode, channels=None, num_layers=None): super().__init__() self.mode = mode.lower() assert self.mode in ['cat', 'max', 'lstm'] if mode == 'lstm': assert channels is not None, 'channels cannot be None for lstm' assert num_layers is not None, 'num_layers cannot be None for lstm' self.lstm = LSTM(channels, (num_layers * channels) // 2, bidirectional=True, batch_first=True) self.att = Linear(2 * ((num_layers * channels) // 2), 1) self.reset_parameters()
def __init__(self, input_size, hidden_size, num_layers, output_dim, drop=0.1, batch_first=True): super(Hybrid_LSTM, self).__init__() self.conv1 = Conv1d(24, 24, 2) self.lstm = LSTM(1, hidden_size, num_layers, batch_first) self.drop = Dropout(drop) self.linear = Linear(hidden_size, hidden_size) self.act = Tanh() self.linear2 = Linear(hidden_size, output_dim)
def __init__(self, embedding_size, hidden_size, num_layers=1, use_cuda=None): super().__init__(hidden_size, use_cuda) self.num_layers = num_layers self.hidden_size = hidden_size self.lstm = LSTM(embedding_size, hidden_size, num_layers, batch_first=True) if use_cuda: self.lstm = self.lstm.cuda()
def __init__(self, vocab_sz, config): # possible next step - use auto scaling of batch size on GPU super().__init__() mp = config.model_params self.op = config.optim_params self.im_vec_dim = mp.im_vec_dim self.ans_dim = mp.ans_dim self.question_dim = mp.question_dim self.n_hidden = mp.n_hidden self.n_layers = mp.n_layers # in case we want multilayer RNN self.i_h = Embedding(vocab_sz, self.n_hidden, padding_idx=0) self.h_o = Linear(self.n_hidden, self.question_dim) self.h = None self.ans_final = Linear(self.n_hidden, self.ans_dim) if self.n_layers > 1: self.rnn = LSTM(self.n_hidden, self.n_hidden, self.n_layers) else: self.rnn = LSTM(self.n_hidden, self.n_hidden) # for images, we use resnet18, and modify the number of output classes self.image_feature_extractor = resnet18(pretrained=False) self.image_feature_extractor.fc = Linear(512, self.im_vec_dim)
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length): super().__init__() self.num_classes = num_classes #number of classes self.num_layers = num_layers #number of layers self.input_size = input_size #input size self.hidden_size = hidden_size #hidden state self.seq_length = seq_length #sequence length self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True) #lstm self.fc_1 = Linear(hidden_size, 100) #fully connected 1 self.fc = Linear(100, num_classes) #fully connected last layer self.relu = ReLU()
def __init__(self, lexicon_size, embedding_dim, padding_idx, lstm_layers, hidden_size, p_dropout, dev): super().__init__() self.hidden_state = None self.hidden_size = hidden_size self.device = dev self.lstm_layers = lstm_layers self.embedding = Embedding(num_embeddings=lexicon_size, embedding_dim=embedding_dim, padding_idx=padding_idx) self.lstm = LSTM(input_size=embedding_dim, hidden_size=hidden_size, num_layers=lstm_layers, batch_first=True) self.fc = Sequential(Dropout(p=p_dropout, inplace=True), Linear(hidden_size, lexicon_size)) self.reset_state() self.apply(kaiming_init)
def _init_layers(self): self.lstm = LSTM(self.input_size, self.hidden_state_size, self.nb_lstm_layer, bidirectional=True) self.attention_layer = Linear(self.hidden_state_size * 2, 1) self.combinaison_layer = Linear(self.hidden_state_size * 2, self.hidden_state_size) xavier_normal_(self.attention_layer.weight.data) xavier_normal_(self.combinaison_layer.weight.data) self.activation = ReLU()
def test_wrapper_works_when_passed_state_with_zero_length_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = torch.rand([5, 7, 3]) mask = torch.ones(5, 7) mask[0, 3:] = 0 mask[1, 4:] = 0 mask[2, 0:] = 0 mask[3, 6:] = 0 # Initial states are of shape (num_layers * num_directions, batch_size, hidden_dim) initial_states = torch.randn(6, 5, 7), torch.randn(6, 5, 7) _ = encoder(input_tensor, mask, initial_states)