def __init__(self, params): super(MNIST_Network, self).__init__() # Module Parameters self.params = params print(params) image_height = self.params.image_size[0] image_width = self.params.image_size[1] hidden_size = self.params.hidden_size output_size = self.params.output_size # LSTM Layers self.horizontal_layer = LSTM( input_size=image_height, hidden_size=hidden_size, bidirectional=True, batch_first=True, bias=True, ) self.vertical_layer = LSTM( input_size=image_width, hidden_size=hidden_size, bidirectional=True, batch_first=True, bias=True, ) # Output Layer self.output_layer = Linear(in_features=4 * hidden_size * image_height, out_features=output_size) # Initialize Parameters self.reset_parameters()
class RecurrentEmbedding(CudaModule): def __init__(self, input_size, hidden_size): super(RecurrentEmbedding, self).__init__() self.lstm = LSTM(input_size, hidden_size, 1, True, False, 0, False) self.hidden_size = hidden_size self.last_h = None self.last_c = None self.hidden_size = hidden_size self.reset() self.dbg_t = None self.seq = 0 def init_weights(self): pass def reset(self): self.last_h = cuda_var(torch.zeros(1, 1, self.hidden_size), self.is_cuda, self.cuda_device) self.last_c = cuda_var(torch.zeros(1, 1, self.hidden_size), self.is_cuda, self.cuda_device) def cuda(self, device=None): CudaModule.cuda(self, device) self.lstm.cuda(device) return self def forward(self, inputs): outputs = self.lstm(inputs, (self.last_h, self.last_c)) self.last_h = outputs[1][0] self.last_c = outputs[1][1] return outputs[0]
class LSTMAggregation(Aggregation): r"""Performs LSTM-style aggregation in which the elements to aggregate are interpreted as a sequence, as described in the `"Inductive Representation Learning on Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper. .. warning:: :class:`LSTMAggregation` is not a permutation-invariant operator. Args: in_channels (int): Size of each input sample. out_channels (int): Size of each output sample. **kwargs (optional): Additional arguments of :class:`torch.nn.LSTM`. """ def __init__(self, in_channels: int, out_channels: int, **kwargs): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.lstm = LSTM(in_channels, out_channels, batch_first=True, **kwargs) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() def forward(self, x: Tensor, index: Optional[Tensor] = None, ptr: Optional[Tensor] = None, dim_size: Optional[int] = None, dim: int = -2) -> Tensor: x, _ = self.to_dense_batch(x, index, ptr, dim_size, dim) return self.lstm(x)[0][:, -1] def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.in_channels}, ' f'{self.out_channels})')
def __init__(self, **kwargs): super(DeterministicLstmModelDynamics, self).__init__() self.__acoustic_state_dim = kwargs['goal_dim'] self.__action_dim = kwargs['action_dim'] self.__state_dim = kwargs['state_dim'] self.__lstm_sizes = kwargs['lstm_layers_size'] self.__linears_size = kwargs['linear_layers_size'] input_size = self.__acoustic_state_dim + self.__state_dim + self.__action_dim self.__bn1 = torch.nn.BatchNorm1d(input_size) self.lstms = ModuleList( [LSTM(input_size, self.__lstm_sizes[0], batch_first=True)]) self.lstms.extend([ LSTM(self.__lstm_sizes[i - 1], self.__lstm_sizes[i], batch_first=True) for i in range(1, len(self.__lstm_sizes)) ]) self.hiddens = [None] * len(self.__lstm_sizes) self.linears = ModuleList( [Linear(self.__lstm_sizes[-1], self.__linears_size[0])]) self.linears.extend([ Linear(self.__linears_size[i - 1], self.__linears_size[i]) for i in range(1, len(self.__linears_size)) ]) self.goal = Linear(self.__linears_size[-1], kwargs['goal_dim']) self.state = Linear(self.__linears_size[-1], kwargs['state_dim']) self.relu = ReLU() self.tanh = Tanh() self.apply(init_weights) # xavier uniform init
def __init__(self, input_size: int, hidden_size: int, num_layers: int, attention: Attention = None, training: bool = True) -> None: super().__init__() self.vocab = None self.training = training self.num_layers = num_layers self.hidden_size = hidden_size self.input_size = input_size if attention is None: self.is_attention = False self.rnn = LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) else: self.is_attention = True self.attention = attention self.rnn = LSTM(input_size=self.hidden_size + self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) self._p_gen = Sequential( Linear(self.hidden_size + self.hidden_size + self.input_size, 1, bias=True), Sigmoid()) self.gen_vocab_dist = None
def setUp(self): super().setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = torch.ones(5, 7).bool() mask[1, 6:] = False mask[2, :] = False # <= completely masked mask[3, 2:] = False mask[4, :] = False # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length( tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices
def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, aggr: str = 'mean', normalize: bool = False, root_weight: bool = True, project: bool = False, bias: bool = True, **kwargs, ): kwargs['aggr'] = aggr if aggr != 'lstm' else None super().__init__(**kwargs) self.in_channels = in_channels self.out_channels = out_channels self.normalize = normalize self.root_weight = root_weight self.project = project if isinstance(in_channels, int): in_channels = (in_channels, in_channels) if self.project: self.lin = Linear(in_channels[0], in_channels[0], bias=True) if self.aggr is None: self.fuse = False # No "fused" message_and_aggregate. self.lstm = LSTM(in_channels[0], in_channels[0], batch_first=True) self.lin_l = Linear(in_channels[0], out_channels, bias=bias) if self.root_weight: self.lin_r = Linear(in_channels[1], out_channels, bias=False) self.reset_parameters()
def __init__(self, params: SequenceEncoderParams): super(SequenceEncoderModel, self).__init__() # word embed layer self._embeddings = self._load_pre_trained(params.EMBED_pre_trained, params.GPU) if params.EMBED_use_pre_trained\ else Embedding(params.EMBED_vocab_dim, params.EMBED_dim) # Bi-LSTM layers self._lstm_layer_0 = LSTM(params.EMBED_dim + params.EMBED_chr_dim, params.LSTM_hidden_dim, params.LSTM_layers, batch_first=True, bidirectional=True) self._lstm_layer_1 = LSTM(params.EMBED_dim + params.EMBED_chr_dim + (2 * params.LSTM_hidden_dim), params.LSTM_hidden_dim, params.LSTM_layers, batch_first=True, bidirectional=True) self._lstm_layer_2 = LSTM(params.EMBED_dim + params.EMBED_chr_dim + (2 * params.LSTM_hidden_dim), params.LSTM_hidden_dim, params.LSTM_layers, batch_first=True, bidirectional=True) self._dropout_0 = Dropout(p=params.LSTM_dropout_0) self._dropout_1 = Dropout(p=params.LSTM_dropout_1) self._dropout_2 = Dropout(p=params.LSTM_dropout_2)
def __init__(self, embedding_size, hidden_size, num_layers=1,use_cuda=None): super().__init__(hidden_size,use_cuda) self.num_layers = num_layers self.hidden_size = hidden_size self.lstm = LSTM(embedding_size, hidden_size, num_layers, batch_first=True) if use_cuda: self.lstm=self.lstm.cuda()
def __init__(self, d, es_idx, ent_vec_dim, rel_vec_dim, cfg, Evocab=40990, Rvocab=13): super(LSTMTuckER, self).__init__() self.Eembed = nn.Embedding(Evocab, cfg.hSize, padding_idx=0) self.Rembed = nn.Embedding(Rvocab, cfg.hSize, padding_idx=0) self.tucker = TuckER(d, ent_vec_dim, rel_vec_dim, cfg) self.es_idx = es_idx self.elstm = LSTM(cfg.hSize, int(ent_vec_dim / 2), num_layers=2, batch_first=True, dropout=0.2, bidirectional=True) #batch_first: If ``True``, then the input and output tensors are provided as (batch, seq, feature). Default: ``False`` self.rlstm = LSTM(cfg.hSize, int(rel_vec_dim / 2), num_layers=2, batch_first=True, dropout=0.2, bidirectional=True) self.loss = torch.nn.BCELoss()
class RNNAdder(Module): def __init__(self, input_dim, hidden_dim, batch_dim=1, output_dim=1, num_layers=2): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.batch_dim = batch_dim self.output_dim = output_dim self.num_layers = num_layers self.lstm = LSTM(self.input_dim, self.hidden_dim, self.num_layers) self.fc = Linear(self.hidden_dim, self.output_dim) self.sigmoid = Sigmoid() self.lstm.to('cpu') def forward(self, data): lstm_out, hidden = self.lstm(data) fc_output = self.fc(lstm_out) return self.sigmoid(fc_output) def init_hidden(self): return (torch.zeros(self.num_layers, self.batch_dim, self.hidden_dim), torch.zeros(self.num_layers, self.batch_dim, self.hidden_dim)) def evaluate(model, X_test, y_test): return np.average(bitwise_accuracy(model(X_test), y_test))
def main(): # lstm=LayeredLSTM() lstm = LSTM(input_size=47764, hidden_size=512, num_layers=8, batch_first=True) # this has no new sequence reset # I wonder if gradient information will increase indefinitely # Even so, I think detaching at the beginning of each new sequence is an arbitrary decision. optim = torch.optim.Adam(lstm.parameters()) lstm.cuda() criterion = torch.nn.SmoothL1Loss() cm = ChannelManager() cm.add_channels(2) cm.cat_call("init_states") for i in range(2000): print(i) optim.zero_grad() target = Variable(torch.rand(2, 1, 512)).cuda() output, states = lstm(cm.cat_call("get_input"), cm.cat_call("get_states", 1)) cm.distribute_call("push_states", states, dim=1) loss = criterion(output, target) loss.backward() optim.step() if i % 3 == 0: cm[0].new_sequence_reset() if i % 5 == 0: cm[1].new_sequence_reset()
def __init__(self, config): super(MRGCN, self).__init__() self.num_features = config.num_features self.num_relations = config.num_relations self.num_classes = config.nclass self.num_layers = config.num_layers #defines number of RGCN conv layers. self.hidden_dim = config.hidden_dim self.layer_spec = None if config.layer_spec == None else list(map(int, config.layer_spec.split(','))) self.lstm_dim1 = config.lstm_input_dim self.lstm_dim2 = config.lstm_output_dim self.rgcn_func = FastRGCNConv if config.conv_type == "FastRGCNConv" else RGCNConv self.activation = F.relu if config.activation == 'relu' else F.leaky_relu self.pooling_type = config.pooling_type self.readout_type = config.readout_type self.temporal_type = config.temporal_type self.dropout = config.dropout self.conv = [] total_dim = 0 if self.layer_spec == None: if self.num_layers > 0: self.conv.append(self.rgcn_func(self.num_features, self.hidden_dim, self.num_relations).to(config.device)) total_dim += self.hidden_dim for i in range(1, self.num_layers): self.conv.append(self.rgcn_func(self.hidden_dim, self.hidden_dim, self.num_relations).to(config.device)) total_dim += self.hidden_dim else: self.fc0_5 = Linear(self.num_features, self.hidden_dim) else: if self.num_layers > 0: print("using layer specification and ignoring hidden_dim parameter.") print("layer_spec: " + str(self.layer_spec)) self.conv.append(self.rgcn_func(self.num_features, self.layer_spec[0], self.num_relations).to(config.device)) total_dim += self.layer_spec[0] for i in range(1, self.num_layers): self.conv.append(self.rgcn_func(self.layer_spec[i-1], self.layer_spec[i], self.num_relations).to(config.device)) total_dim += self.layer_spec[i] else: self.fc0_5 = Linear(self.num_features, self.hidden_dim) total_dim += self.hidden_dim if self.pooling_type == "sagpool": self.pool1 = RGCNSAGPooling(total_dim, self.num_relations, ratio=config.pooling_ratio, rgcn_func=config.conv_type) elif self.pooling_type == "topk": self.pool1 = TopKPooling(total_dim, ratio=config.pooling_ratio) self.fc1 = Linear(total_dim, self.lstm_dim1) if "lstm" in self.temporal_type: self.lstm = LSTM(self.lstm_dim1, self.lstm_dim2, batch_first=True) self.attn = Attention(self.lstm_dim2) self.lstm_decoder = LSTM(self.lstm_dim2, self.lstm_dim2, batch_first=True) else: self.fc1_5 = Linear(self.lstm_dim1, self.lstm_dim2) self.fc2 = Linear(self.lstm_dim2, self.num_classes)
def __init__( self, G, content_embedd, len_embed, word_embed, config, layer_infos, content_size, # minibatch 那边要产生 question_size, user_size, deg, idx2id): super(UnSupervisedGraphSage, self).__init__() #self.embedd self.content_embed = nn.Embedding(content_embedd.shape[0], content_embedd.shape[1]) self.content_embed.weight = nn.Parameter( numpy2tensor_long(content_embedd), requires_grad=False) self.word_embed = nn.Embedding(word_embed.shape[0], word_embed.shape[1]) self.word_embed.weight = nn.Parameter(numpy2tensor_float(word_embed), requires_grad=False) # for pad order sort self.content_len_embed = nn.Embedding(len_embed.shape[0], 1) self.content_len_embed.weight = nn.Parameter( numpy2tensor_int(len_embed), requires_grad=False) # https://discuss.pytorch.org/t/can-we-use-pre-trained-word-embeddings-for-weight-initialization-in-nn-embedding/1222 self.user_embed = nn.Embedding(user_size, word_embed.shape[1]) # 需要初始化, 然后是能够训练 init.xavier_uniform_(self.user_embed.weight) self.user_embed.weight = nn.Parameter(self.user_embed.weight) self.config = config self.batch_size = self.config.batch_size # question-answer lstm model to generate vector self.lstm = LSTM(self.config.lstm_input_size, self.config.lstm_hidden_size, batch_first=True, dropout=self.config.drop_out) self.user_answer = UserAnswer() self.question_answer = QuestionAnswer() self.score = Score() self.neg_score = NegScore() self.layer_infos = layer_infos self.dims = [word_embed.shape[1]] self.dims.extend( [layer_infos[i].output_dim for i in range(len(layer_infos))]) self.aggregators = self._init_agg() self.deg_question = deg[0:question_size] self.deg_user = deg[question_size:] self.content_size = content_size self.idx2id = idx2id self.G = G
def __init__(self, input_dim, output_dim, num_blocks, kernel_size, dropout, generated=False): super(Encoder, self).__init__() assert num_blocks > 0, ('There must be at least one convolutional block in the encoder.') assert output_dim % 2 == 0, ('Bidirectional LSTM output dimension must be divisible by 2.') convs = [ConvBlock(input_dim, output_dim, kernel_size, dropout, 'relu')] + \ [ConvBlock(output_dim, output_dim, kernel_size, dropout, 'relu') for _ in range(num_blocks - 1)] self._convs = Sequential(*convs) self._lstm = LSTM(output_dim, output_dim // 2, batch_first=True, bidirectional=True)
class BertBiLSTMCRFSLModel(BertPreTrainedModel): def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.return_dict = config.return_dict if hasattr( config, "return_dict") else False self.bert = BertModel(config) self.dropout = Dropout(config.hidden_dropout_prob) self.lstm = LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, batch_first=True, bidirectional=True) self.classifier = Linear(config.hidden_size * 2, config.num_labels) self.crf = CRF(num_tags=config.num_labels, batch_first=True) self.init_weights() def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, return_dict=None): self.lstm.flatten_parameters() outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, return_dict=self.return_dict) sequence_output = outputs[0] sequence_output = self.dropout(sequence_output) lstm_output = self.lstm(sequence_output) lstm_output = lstm_output[0] logits = self.classifier(lstm_output) loss = None if labels is not None: ## [TBD] change {label_id:-100 [CLS], [SEP], [PAD]} into {label_id:32 "O"} ## It means they contribute loss to loss function, so it need to be improved active_idx = labels != -100 active_labels = torch.where(active_idx, labels, torch.tensor(0).type_as(labels)) loss = self.crf(emissions=logits, tags=active_labels, mask=attention_mask.type(torch.uint8)) loss = -1 * loss if self.return_dict: return TokenClassifierOutput( loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) else: output = (logits, ) + outputs[2:] return ((loss, ) + output) if loss is not None else output
def _init_lstms(self, output_ids, hidden_size, num_shared_layers, num_taskspecific_layers, dropout, shared_embeddings=None): assert num_shared_layers + num_taskspecific_layers > 0, "There must be at least one LSTM layer" if num_shared_layers > 0: shared_lstm = LSTM( input_size=self.embeddings_wrapper.embedding_dim, hidden_size=hidden_size, num_layers=num_shared_layers, batch_first=True, dropout=dropout, bidirectional=True) else: shared_lstm = None taskspecific_input_size = 2 * hidden_size if num_shared_layers > 0 else self.embeddings_wrapper.embedding_dim if num_taskspecific_layers > 0: if shared_embeddings is None: task_lstm = nn.ModuleDict({ outp_id: LSTM(input_size=taskspecific_input_size, hidden_size=hidden_size, num_layers=num_taskspecific_layers, dropout=dropout, batch_first=True, bidirectional=True) for outp_id in self.output_ids }) else: task_lstm = nn.ModuleDict() for group in shared_embeddings: curr_lstm = LSTM(input_size=taskspecific_input_size, hidden_size=hidden_size, num_layers=num_taskspecific_layers, dropout=dropout, batch_first=True, bidirectional=True) for outp_id in group: task_lstm[outp_id] = curr_lstm for outp_id in self.output_ids: if outp_id not in task_lstm: # Add LSTMs for all outputs that don't have one yet task_lstm[outp_id] = LSTM( input_size=taskspecific_input_size, hidden_size=hidden_size, num_layers=num_taskspecific_layers, dropout=dropout, batch_first=True, bidirectional=True) else: task_lstm = None return shared_lstm, task_lstm
def test_get_dimension_is_correct(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) assert encoder.get_output_dim() == 14 assert encoder.get_input_dim() == 2 lstm = LSTM(bidirectional=False, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) assert encoder.get_output_dim() == 7 assert encoder.get_input_dim() == 2
def __init__(self, input_dim, hidden_dim, batch_dim=1, output_dim=1, num_layers=2): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.batch_dim = batch_dim self.output_dim = output_dim self.num_layers = num_layers self.lstm = LSTM(self.input_dim, self.hidden_dim, self.num_layers) self.fc = Linear(self.hidden_dim, self.output_dim) self.sigmoid = Sigmoid() self.lstm.to('cpu')
def initmodel(character_encoder, tag_encoder, embedded_dimension): """ :param character_encoder: :param tag_encoder: :param embedded_dimension: :return: """ character_encoder = copy(character_encoder) tag_encoder = copy(tag_encoder) tag_encoder[BD] = len(tag_encoder) character_encoder[BD] = len(character_encoder) character_embedding = Embedding(len(character_encoder), embedded_dimension) tag_embedding = Embedding(len(tag_encoder), embedded_dimension) encoder_part = LSTM(input_size=embedded_dimension, hidden_size=LSTMDIM, num_layers=1, bidirectional=1).type(DTYPE) ench0 = randn(2, 1, LSTMDIM).type(DTYPE) encc0 = randn(2, 1, LSTMDIM).type(DTYPE) decoder_part = LSTM(input_size=2 * LSTMDIM + embedded_dimension, hidden_size=LSTMDIM, num_layers=1).type(DTYPE) dech0 = randn(2, 1, 2 * LSTMDIM + embedded_dimension).type(DTYPE) decc0 = randn(2, 1, 2 * LSTMDIM + embedded_dimension).type(DTYPE) pred = Linear(LSTMDIM, len(character_encoder)).type(DTYPE) softmax = LogSoftmax().type(DTYPE) model = ModuleList([ character_embedding, tag_embedding, encoder_part, decoder_part, pred, softmax ]) optimizer = Adam(model.parameters(), lr=LEARNINGRATE, betas=BETAS) return { 'model': model, 'optimizer': optimizer, 'cencoder': character_encoder, 'tencoder': tag_encoder, 'cembedding': character_embedding, 'tembedding': tag_embedding, 'enc': encoder_part, 'ench0': ench0, 'encc0': encc0, 'dec': decoder_part, 'dech0': dech0, 'decc0': decc0, 'pred': pred, 'sm': softmax, 'embdim': embedded_dimension }
def __init__(self, input_size, hidden_size): super(RecurrentEmbedding, self).__init__() self.lstm = LSTM(input_size, hidden_size, 1, True, False, 0, False) self.hidden_size = hidden_size self.last_h = None self.last_c = None self.hidden_size = hidden_size self.reset() self.dbg_t = None self.seq = 0
class Encoder(torch.nn.Module): """Vanilla Tacotron 2 encoder. Details: stack of 3 conv. layers 5 × 1 with BN and ReLU, dropout output is passed into a Bi-LSTM layer Arguments: input_dim -- size of the input (supposed character embedding) output_dim -- number of channels of the convolutional blocks and last Bi-LSTM num_blocks -- number of the convolutional blocks (at least one) kernel_size -- kernel size of the encoder's convolutional blocks dropout -- dropout rate to be aplied after each convolutional block Keyword arguments: generated -- just for convenience """ def __init__(self, input_dim, output_dim, num_blocks, kernel_size, dropout, generated=False): super(Encoder, self).__init__() assert num_blocks > 0, ( 'There must be at least one convolutional block in the encoder.') assert output_dim % 2 == 0, ( 'Bidirectional LSTM output dimension must be divisible by 2.') convs = [ConvBlock(input_dim, output_dim, kernel_size, dropout, 'relu')] + \ [ConvBlock(output_dim, output_dim, kernel_size, dropout, 'relu') for _ in range(num_blocks - 1)] self._convs = Sequential(*convs) self._lstm = LSTM(output_dim, output_dim // 2, batch_first=True, bidirectional=True) def forward(self, x, x_lenghts, x_langs=None): # x_langs argument is there just for convenience x = x.transpose(1, 2) x = self._convs(x) x = x.transpose(1, 2) ml = x.size(1) x = torch.nn.utils.rnn.pack_padded_sequence(x, x_lenghts, batch_first=True) self._lstm.flatten_parameters() x, _ = self._lstm(x) x, _ = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True, total_length=ml) return x
def __init__(self, mode, channels=None, num_layers=None): super().__init__() self.mode = mode.lower() assert self.mode in ['cat', 'max', 'lstm'] if mode == 'lstm': assert channels is not None, 'channels cannot be None for lstm' assert num_layers is not None, 'num_layers cannot be None for lstm' self.lstm = LSTM(channels, (num_layers * channels) // 2, bidirectional=True, batch_first=True) self.att = Linear(2 * ((num_layers * channels) // 2), 1) self.reset_parameters()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.return_dict = config.return_dict if hasattr( config, "return_dict") else False self.bert = BertModel(config) self.dropout = Dropout(config.hidden_dropout_prob) self.lstm = LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, batch_first=True, bidirectional=True) self.classifier = Linear(config.hidden_size * 2, config.num_labels) self.crf = CRF(num_tags=config.num_labels, batch_first=True) self.init_weights()
def __init__(self, input_size, hidden_size, wordEmbed): super(Encoder, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.word_embed = wordEmbed self.fwd_rnn = LSTM(self.input_size, self.hidden_size, batch_first=True) self.bkwd_rnn = LSTM(self.input_size, self.hidden_size, batch_first=True) self.output_cproj = Linear(self.hidden_size * 2, self.hidden_size) self.output_hproj = Linear(self.hidden_size * 2, self.hidden_size)
def __init__(self, out_vocab, embed_size, hidden_size, in_channel=13): super(Model, self).__init__() self.conv1d = Conv1d(in_channel, embed_size, kernel_size=3, stride=1, padding=1, bias=False) self.lstm1 = LSTM(embed_size, hidden_size, bidirectional=True) self.lstm2 = LSTM(hidden_size * 2, hidden_size, bidirectional=True) self.lstm3 = LSTM(hidden_size * 2, hidden_size, bidirectional=True) self.predict = False self.linear3 = Linear(hidden_size * 2, out_vocab + 1)
def __init__(self, input_size, hidden_size, bilstm_layers, weights_matrix, cam_type, device, context='art', pos_dim=100, src_dim=100, pos_quartiles=4, nr_srcs=3): super(ContextAwareModel, self).__init__() self.input_size = input_size self.hidden_size = hidden_size # + pos_dim + src_dim self.bilstm_layers = bilstm_layers self.device = device self.cam_type = cam_type self.context = context # Store pretrained embeddings to use as representations of sentences self.weights_matrix = torch.tensor(weights_matrix, dtype=torch.float, device=self.device) self.embedding = Embedding.from_pretrained(self.weights_matrix) self.embedding_pos = Embedding(pos_quartiles, pos_dim) # option to embed position of target sentence in article self.embedding_src = Embedding(nr_srcs, src_dim) self.emb_size = weights_matrix.shape[1] # Initialise LSTMS for article and event context self.lstm_art = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) self.lstm_ev1 = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) self.lstm_ev2 = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) # Attention-related attributes # self.attention = BahdanauAttention(self.hidden_size, key_size=self.hidden_size * 2, query_size=self.emb_size) # self.rob_squeezer = nn.Linear(self.emb_size, self.hidden_size) self.dropout = Dropout(0.6) self.num_labels = 2 self.pad_index = 0 if self.context == 'art': self.context_rep_dim = self.emb_size + self.hidden_size * 2 # size of target sentences + 1 article else: self.context_rep_dim = self.emb_size + self.hidden_size * 6 # size of target sentences + 3 articles if self.cam_type == 'cim*': self.context_rep_dim += src_dim # add representation of source self.half_context_rep_dim = int(self.context_rep_dim*0.5) self.dense = nn.Linear(self.context_rep_dim, self.half_context_rep_dim) if self.cam_type == 'cnm': # optional Context Naive setting self.classifier = Linear(self.emb_size, self.num_labels) else: self.classifier = Linear(self.half_context_rep_dim, self.num_labels) # + self.emb_size + src_dim, 2) # self.sigm = Sigmoid()
def __init__(self, input_size, hidden_size, wordEmbed): super(Encoder,self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = 1 self.word_embed = wordEmbed self.fwd_rnn = LSTM(self.input_size, self.hidden_size, batch_first=True) # self.fwd_rnn = DataParallel(self.fwd_rnn) self.bkwd_rnn = LSTM(self.input_size, self.hidden_size, batch_first=True) # self.bkwd_rnn = DataParallel(self.bkwd_rnn) # Since we have a bi-directional lstm we need to map from hidden*2 to hidden for decoder inputs self.output_cproj = Linear(self.hidden_size * 2, self.hidden_size) self.output_cproj = DataParallel(self.output_cproj) self.output_hproj = Linear(self.hidden_size * 2, self.hidden_size) self.output_hproj = DataParallel(self.output_hproj)
def __init__(self, bert_model_config: DistilBertConfig): super(DocumentDistilBertLSTM, self).__init__(bert_model_config) self.distilbert = DistilBertModel(bert_model_config) self.pooler = DistilBertPooler(bert_model_config) self.bert_batch_size = self.distilbert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.dropout) self.lstm = LSTM( bert_model_config.hidden_size, bert_model_config.hidden_size, ) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.dropout), nn.Linear(bert_model_config.hidden_size, bert_model_config.num_labels), nn.Tanh()) self.init_weights()
def __init__(self, input_size, hidden_size, dropout=0.0, layers=1, bidirectional=True, to_cuda=False, conditional_encoding=True): super(TorchPairedBiDirectionalLSTM, self).__init__() self.conditional_encoding = conditional_encoding use_bias = True num_directions = (1 if not bidirectional else 2) self.conditional_encoding = conditional_encoding self.lstm1 = LSTM(input_size, hidden_size, layers, use_bias, True, Config.dropout, bidirectional) self.lstm2 = LSTM(input_size, hidden_size, layers, use_bias, True, Config.dropout, bidirectional) # states of both LSTMs self.h01 = None self.c01 = None self.h02 = None self.c02 = None self.h01 = Variable( torch.FloatTensor(num_directions * layers, Config.batch_size, hidden_size)) self.c01 = Variable( torch.FloatTensor(num_directions * layers, Config.batch_size, hidden_size)) if Config.cuda: self.h01 = self.h01.cuda() self.c01 = self.c01.cuda() if not self.conditional_encoding: self.h02 = Variable( torch.FloatTensor(num_directions * layers, Config.batch_size, hidden_size)) self.c02 = Variable( torch.FloatTensor(num_directions * layers, Config.batch_size, hidden_size)) if Config.cuda: self.h02 = self.h02.cuda() self.c02 = self.c02.cuda()