def __init__(self, hparams): super().__init__() self.hidden_size = hparams.role_size * hparams.filler_size self.temperature = hparams.temperature self.zeroth_filler_cell_state = Parameter( empty(1, self.hidden_size).zero_()) self.filler_cell = LSTMCell(hparams.embedding_dim, self.hidden_size) self.filler_hidden_to_number = Linear(self.hidden_size, hparams.filler_number, bias=False) self.filler_dictionary = Linear(hparams.filler_number, hparams.filler_size, bias=False) self.zeroth_role_cell_state = Parameter( empty(1, self.hidden_size).zero_()) self.role_cell = LSTMCell(hparams.embedding_dim, self.hidden_size) self.role_hidden_to_number = Linear(self.hidden_size, hparams.role_number, bias=False) self.role_dictionary = Linear(hparams.role_number, hparams.role_size, bias=False) self.zeroth_hidden_state = Parameter(empty(self.hidden_size).zero_())
def get_pytorch_lstm(input_size, used_lstm): """Load in a PyTorch LSTM that is a copy of the currently used LSTM.""" lstm = PytorchLSTM(input_size, 1) lstm.bias_hh[:] = tensor(zeros((4, )), dtype=float64)[:] lstm.bias_ih[:] = tensor(used_lstm.bias, dtype=float64)[:] lstm.weight_hh[:] = tensor(used_lstm.weight_hh, dtype=float64)[:] lstm.weight_ih[:] = tensor(used_lstm.weight_xh, dtype=float64)[:] return lstm
def __init__(self, state_size, num_actions, act_lim=1, batch_size=1, hidden_size=128, num_layers=2, dropout=0.85): """ Construct a multilayer LSTM that computes the action given the state The agent will first decide which dimension to act on and then decide the numerical value of the aciton on that dimension - shape of input state is given by state_size - dimensions of the orthogonal action space is given by num_actions, whereas act_lim gives the numerical bound for action values Note: the last action dimension is assumed to be discrete, meaning the agent "does nothing". - hidden_size should match that of the encoding network (i.e. the size of the encoding layer) """ super(PolicyNet, self).__init__() self.state_size = state_size self.num_actions = num_actions self.act_lim = act_lim self.batch_size = batch_size self.hidden_size = hidden_size self.num_layers = num_layers self.dropout = dropout # Create multilayer LSTM cells self.cell_list = nn.ModuleList() self.cell_list.append( LSTMCell(input_size=state_size, hidden_size=hidden_size)) for i in range(1, num_layers): self.cell_list.append( LSTMCell(input_size=hidden_size, hidden_size=hidden_size)) # Linear layer that decides the dimension the agent wants to act on. # Return the logits to be used to construct a Categorical distribution self.FC_decision = Linear(hidden_size, num_actions) # Linear layer that computes the mean value of the agent's action on each dimension self.FC_values_mean = Linear(hidden_size, num_actions) # Linear layer that computes the log standard deviation of the agent's action on each dimension self.FC_values_logstd = Linear(hidden_size, num_actions) # Variables to store lists of hidden states and cell states at the end of each time step so as to be used as # the input values to the next time step # Reset to None at the start of each episode self.h_list = None self.c_list = None
def __init__(self, channels, h_g, h_l, std, hidden_size, num_classes, learned_start): """ Initialize the recurrent attention model and its different components. Args ---- - g: size of the square patches in the glimpses extracted by the retina. - k: number of patches to extract per glimpse. - s: scaling factor that controls the size of successive patches. - c: number of num_channels in each image. - h_g: hidden layer size of the fc layer for `phi`. - h_l: hidden layer size of the fc layer for `l`. - std: standard deviation of the Gaussian policy. - hidden_size: hidden size of the rnn. - num_classes: number of num_classes in the dataset. - num_glimpses: number of glimpses to take per image, i.e. number of BPTT steps. """ super(RecurrentAttention, self).__init__() self.std = std self.sensor = glimpse_network(h_g, h_l, learned_start, channels) self.rnn = LSTMCell(256, hidden_size) self.decision = decision_network(hidden_size, 2) self.illuminator = illumination_network(hidden_size, channels, std) self.classifier = action_network(hidden_size, num_classes)
def __init__( self, decoding_dim: int, target_embedding_dim: int, attention: Optional[Attention] = None, bidirectional_input: bool = False, ) -> None: super().__init__( decoding_dim=decoding_dim, target_embedding_dim=target_embedding_dim, decodes_parallel=False, ) # In this particular type of decoder output of previous step passes directly to the input of current step # We also assume that decoder output dimensionality is equal to the encoder output dimensionality decoder_input_dim = self.target_embedding_dim # Attention mechanism applied to the encoder output for each step. self._attention = attention if self._attention: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. encoder output dim will be same as decoding_dim decoder_input_dim += decoding_dim # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. self._decoder_cell = LSTMCell(decoder_input_dim, self.decoding_dim) self._bidirectional_input = bidirectional_input
def __init__(self, embed: nn.Embedding = None, hidden_size: int = 200, dropout: float = 0.1, layer: str = "rcnn", z_rnn_size: int = 30, ): super(DependentLatentModel, self).__init__() self.layer = layer emb_size = embed.weight.shape[1] enc_size = hidden_size * 2 self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout)) self.enc_layer = get_encoder(layer, emb_size, hidden_size) if layer == "rcnn": self.z_cell = RCNNCell(enc_size + 1, z_rnn_size) else: self.z_cell = LSTMCell(enc_size + 1, z_rnn_size) self.z_layer = KumaGate(enc_size + z_rnn_size) self.z = None # z samples self.z_dists = [] # z distribution(s) self.report_params()
def __init__(self, arch: dict): super().__init__() self.ldim = arch['latent_dim'] self.defaultSteps = arch['std_T'] self.Cinit = torch.nn.Parameter(torch.FloatTensor(self.ldim)) torch.nn.init.normal_(self.Cinit) self.Linit = torch.nn.Parameter(torch.FloatTensor(self.ldim)) torch.nn.init.normal_(self.Linit) self.rec_block = arch['recurrent_block'] if self.rec_block == 'test': # self.block = BiPartialTestBlock(self.ldim, self.ldim, 2 * self.ldim, 2) Ldim, Cdim, Hdim, Dpth = self.ldim, self.ldim, 2 * self.ldim, 2 self.Cmsg = batchMLP(Cdim, Hdim, Cdim, Dpth, False) self.Lmsg = batchMLP(Ldim, Hdim, Ldim, Dpth, False) self.Cu = batchMLP(Cdim * 2, Hdim, Cdim, Dpth, False) self.Lu = batchMLP(Ldim * 3, Hdim, Ldim, Dpth, False) elif self.rec_block in ['std_lstm', 'ln_lstm', 'gru']: Ldim, Cdim, Hdim, Dpth = self.ldim, self.ldim, self.ldim, 4 self.Cmsg = batchMLP(Cdim, Hdim, Cdim, Dpth, False) self.Lmsg = batchMLP(Ldim, Hdim, Ldim, Dpth, False) if self.rec_block == 'std_lstm': self.Cu = LSTMCell(self.ldim, self.ldim, True) self.Lu = LSTMCell(self.ldim * 2, self.ldim, True) elif self.rec_block == 'ln_lstm': self.Cu = ln_LSTMCell(self.ldim, self.ldim, True) self.Lu = ln_LSTMCell(self.ldim * 2, self.ldim, True) elif self.rec_block == 'gru': self.Cu = GRUCell(self.ldim, self.ldim, True) self.Lu = GRUCell(self.ldim * 2, self.ldim, True) self.cl = arch['classifier'] if self.cl == 'NeuroSAT': self.Lvote = batchMLP(self.ldim, 2 * self.ldim, 1, 2, False) elif self.cl == 'CircuitSAT-like': self.tnormf = arch['tnorm'] if 'tnorm_train' in arch: self.train_tnorm = arch['tnorm_train'] else: self.train_tnorm = self.tnormf self.tnorm_tmp = arch['tnorm_temperature'] self.train_temp = arch['temp_train'] self.test_temp = arch['temp_test'] self.Lvote = batchMLP(self.ldim, 2 * self.ldim, 1, 2, False)
def __init__(self, arch=None): """ :param arch: dictionary, for overriding default architecture """ nn.Module.__init__(self) self.arch = deepcopy(default_arch) if arch is not None: self.arch.update(arch) self.T = self.arch.max_steps self.reinforce_weight = 0.0 # 4: where + pres lstm_input_size = self.arch.input_size + self.arch.z_what_size + 4 self.lstm_cell = LSTMCell(lstm_input_size, self.arch.lstm_hidden_size) # predict z_where, z_pres from h self.predict = Predict(self.arch) # encode object into what self.encoder = Encoder(self.arch) # decode what into object self.decoder = Decoder(self.arch) # spatial transformers self.image_to_object = SpatialTransformer(self.arch.input_shape, self.arch.object_shape) self.object_to_image = SpatialTransformer(self.arch.object_shape, self.arch.input_shape) # baseline RNN self.bl_rnn = LSTMCell(lstm_input_size, self.arch.baseline_hidden_size) # predict baseline value self.bl_predict = nn.Linear(self.arch.baseline_hidden_size, 1) # priors self.pres_prior = Bernoulli(probs=self.arch.z_pres_prob_prior) self.where_prior = Normal(loc=self.arch.z_where_loc_prior, scale=self.arch.z_where_scale_prior) self.what_prior = Normal(loc=self.arch.z_what_loc_prior, scale=self.arch.z_what_scale_prior) # modules excluding baseline rnn self.air_modules = nn.ModuleList( [self.predict, self.lstm_cell, self.encoder, self.decoder]) self.baseline_modules = nn.ModuleList([self.bl_rnn, self.bl_predict])
def __init__(self, token_embedder, embed_dim): super(RNNSequenceEmbedder, self).__init__() self._embed_dim = embed_dim word_dim = token_embedder.embed_dim rnn_cell = LSTMCell(word_dim, embed_dim) self.source_encoder = SimpleSourceEncoder(rnn_cell) self.vocab = token_embedder.vocab self.token_embedder = token_embedder
def __init__(self, token_embedder, hidden_dim, input_dim, agenda_dim): super(SimpleDecoderCell, self).__init__() self.rnn_cell = LSTMCell(input_dim + agenda_dim, hidden_dim) self.linear = Linear(hidden_dim, input_dim) self.h0 = Parameter(torch.zeros(hidden_dim)) self.c0 = Parameter(torch.zeros(hidden_dim)) self.softmax = Softmax(dim=1) self.token_embedder = token_embedder
def init_network(self): """Initialize network parameters. This is an actor-critic build on top of a RNN cell. The actor is a fully connected layer, and the critic consists of two fully connected layers""" self.rnn = LSTMCell(self.n_actions, self._hidden_size) for p in self.rnn.parameters(): uniform_(p, self._uniform_init[0], self._uniform_init[1]) self.actor = Linear(self._hidden_size, self.n_actions) for p in self.actor.parameters(): uniform_(p, self._uniform_init[0], self._uniform_init[1]) self.middle_critic = Linear(self._hidden_size, self._hidden_size // 2) for p in self.middle_critic.parameters(): uniform_(p, self._uniform_init[0], self._uniform_init[1]) self.critic = Linear(self._hidden_size // 2, 1) for p in self.critic.parameters(): uniform_(p, self._uniform_init[0], self._uniform_init[1])
def __init__(self, embeddings, max_word=32, multi_image=1, multi_merge='att', labels=None, aete_s=2000, aete_r=5, lstm_dim=256, lambda_a=0.85, teacher_forcing=None, image_model=None, image_pretrained=None, finetune_image=False, image_finetune_epoch=None, rl_opts=None, word_idxs=None, device='gpu', verbose=False): super(TieNet, self).__init__(max_word, multi_image, multi_merge, teacher_forcing, image_finetune_epoch, rl_opts, word_idxs, verbose) # Label statistics self.chexpert_labels, self.lp, self.ln, self.lq = self._load_labels( labels) # Various NN parameters self.feat_dim = lstm_dim self.lstm_dim = lstm_dim self.lambda_a = lambda_a self.dropout = Dropout(0.5) # Image processes if image_model is None: image_model = 'resnet50' self.image_feats, image_dim = ImageClassification.image_features( image_model, not finetune_image, True, image_pretrained, device) self._init_multi_image(image_dim, self.VISUAL_NUM, lstm_dim) self.image_proj = Linear(image_dim, lstm_dim) # Word processes self.init_h = Linear(lstm_dim, lstm_dim) self.init_c = Linear(lstm_dim, lstm_dim) self.att_v = Linear(image_dim, image_dim) self.att_h = Linear(lstm_dim, image_dim) self.att_a = Linear(image_dim, 1) self.gate = Linear(lstm_dim, image_dim) input_dim = image_dim + embeddings.shape[1] self.lstm_word = LSTMCell(input_dim, lstm_dim) self.embeddings = Embedding.from_pretrained( embeddings, freeze=False, padding_idx=PretrainedEmbeddings.INDEX_PAD) self.embed_num = self.embeddings.num_embeddings self.word_dense = Linear(lstm_dim, embeddings.shape[0], bias=False) # AETE processes self.aete1 = Linear(lstm_dim, aete_s) self.aete2 = Linear(aete_s, aete_r) # Joint self.joint = Linear(lstm_dim + image_dim, self.DISEASE_NUM * 2)
def __init__(self, input_size, hidden_size): super(mLSTMCell, self).__init__() self._input_size = input_size self._hidden_size = hidden_size self._lstm_cell = weight_norm(LSTMCell(input_size, hidden_size), name='weight_ih') self._lstm_cell = weight_norm(self._lstm_cell, name='weight_hh') self._i_multiplier = weight_norm(Linear(input_size, hidden_size, bias=False)) self._h_multiplier = weight_norm(Linear(hidden_size, hidden_size, bias=False))
def __init__(self, input_size, output_size, hidden_size): super(LSTMAuto, self).__init__() self.input_size = input_size self.output_size = output_size self.hidden_size = hidden_size # Output of previous iteration appended to input self.lstmCell = LSTMCell(output_size + input_size, hidden_size) # Softmax variables self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.Softmax()
def __init__(self, input_shape, id, normalize, nb_hidden): super().__init__(input_shape, id) self._nb_hidden = nb_hidden if normalize: self.lstm = LSTMCellLayerNorm(input_shape[0], nb_hidden) else: self.lstm = LSTMCell(input_shape[0], nb_hidden) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0)
def __init__(self, h_g, h_l, std, hidden_size, num_classes, patch_amount, patch_size, scale_factor): super(AdaptiveAttention, self).__init__() self.std = std self.sensor = GlimpseNetwork(hidden_size, patch_amount=patch_amount, patch_size=patch_size, scale_factor=scale_factor) self.rnn = LSTMCell(hidden_size, hidden_size) self.decider = DecisionNetwork(hidden_size, 2) self.locator = LocationNetwork(hidden_size, 2, std) self.classifier = ActionNetwork(hidden_size, num_classes) self.baseliner = BaselineNetwork(hidden_size, 1)
def __init__(self, emb_dim, classifier_type): """ :param emb_dim: :param classifier_type: тип классификатора: 1 - по состоянию верхней вершины, 2 - по состоянию всех вершин, 3 - по состоянию только вершин-переменных, 5 - вычисление по логике Заде, 6 - вычисление по вероятностной логике, 7 - вычисление по логике Лукашевича """ super().__init__() self.classifier_type = classifier_type self.emb_dim = emb_dim self.start_embeddings1 = torch.zeros([self.emb_dim], requires_grad=False) # self.start_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim)) # torch.nn.init.normal_(self.start_embeddings, 0, 1) self.con_embeddings1 = torch.tensor([1., 0., 0., 0., ], requires_grad=False) # self.con_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim)) # torch.nn.init.normal_(self.con_embeddings, 0, 1) self.dis_embeddings1 = torch.tensor([0., 1., 0., 0., ], requires_grad=False) # self.con_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim)) # torch.nn.init.normal_(self.con_embeddings, 0, 1) self.neg_embeddings1 = torch.tensor([0., 0., 1., 0., ], requires_grad=False) # self.neg_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim)) # torch.nn.init.normal_(self.neg_embeddings, 0, 1) self.var_embeddings1 = torch.tensor([0., 0., 0., 1., ], requires_grad=False) # self.var_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim)) # torch.nn.init.normal_(self.var_embeddings, 0, 1) # self.msg1_1_f = Linear(2 * self.emb_dim, 2 * self.emb_dim, True) self.msg1_1_f = Linear(self.emb_dim + 4, 2 * self.emb_dim, True) self.msg1_2_f = Linear(2 * self.emb_dim, self.emb_dim, True) # self.msg2_1_f = Linear(2 * self.emb_dim, 2 * self.emb_dim, True) self.msg2_1_f = Linear(self.emb_dim + 4, 2 * self.emb_dim, True) self.msg2_2_f = Linear(2 * self.emb_dim, self.emb_dim, True) self.update_f = LSTMCell(self.emb_dim, self.emb_dim, self.emb_dim) self.clf_1 = Linear(self.emb_dim, self.emb_dim, True) self.clf_2 = Linear(self.emb_dim, 1, False) if self.classifier_type in [1, 2, 3, 4]: pass # self.cl = Linear(self.emb_dim, 1, False) elif self.classifier_type == 5: # self.tvalue = Linear(self.emb_dim, 1, False) self.cl = GodelEvaluation() elif self.classifier_type == 6: # self.tvalue = Linear(self.emb_dim, 1, False) self.cl = ProbabilisticEvaluation() elif self.classifier_type == 7: # self.tvalue = Linear(self.emb_dim, 1, False) self.cl = LukasieviczEvaluation()
def __init__(self, input_size: int, hidden_size: int = 200, bias: bool = True, dropout: float = 0.): super().__init__(hidden_size) self.output_dim = hidden_size self.states: List[Tensor] = list() self.items = list() self.cell = LSTMCell(input_size, hidden_size, bias) self.dropout = Dropout(dropout) # TODO:每一个instance的mask要一致
def __init__(self, nb_input_channel, nb_out_channel, normalize): super().__init__() self._nb_output_channel = nb_out_channel if normalize: self.lstm = LSTMCellLayerNorm(nb_input_channel, self._nb_output_channel) else: self.lstm = LSTMCell(nb_input_channel, self._nb_output_channel) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0)
def __init__(self,params:configargparse.Namespace,att: torch.nn.Module=None): """ Neural Network Module for the Sequence to Sequence LAS Model :params configargparse.Namespace params: The training options :params torch.nn.Module att: The attention module """ super(Speller,self).__init__() ## Embedding Layer self.embed = Embedding(params.odim,params.demb_dim) ## Decoder with LSTM Cells self.decoder = ModuleList() self.dropout_dec = ModuleList() self.dtype = params.dtype self.dunits = params.dhiddens self.dlayers = params.dlayers self.decoder += [ LSTMCell(params.eprojs + params.demb_dim, params.dhiddens) if self.dtype == "lstm" else GRUCell(params.eprojs + params.demb_dim, params.dhiddens) ] self.dropout_dec += [Dropout(p=params.ddropout)] self.dropout_emb = Dropout(p=params.ddropout) ## Other decoder layers if > 1 decoder layer for i in range(1,params.dlayers): self.decoder += [ LSTMCell(params.dhiddens, params.dhiddens) if self.dtype == "lstm" else GRUCell(params.dhiddens, params.dhiddens) ] self.dropout_dec += [LockedDropout(p=params.ddropout)] # Dropout ## Project to Softmax Space- Output self.projections = Linear(params.dhiddens, params.odim) ## Attention Module self.att = att ## Scheduled Sampling self.sampling_probability = params.ssprob ## Initialize EOS, SOS self.eos = len(params.char_list) -1 self.sos = self.eos self.ignore_id = params.text_pad
def __init__(self, input_dim: int, hidden_dim: int, num_layers: int = 1, layer_dropout: float = 0.0, recurrent_dropout: float = 0.0): super().__init__(input_dim, hidden_dim) self.hidden = None self.context = None assert num_layers >= 1 self.layers = num_layers self._lstm_cell0 = LSTMCell(input_dim, hidden_dim) self._lstm_cellL = [ LSTMCell(hidden_dim, hidden_dim) for _ in range(num_layers - 1) ] self.layer_dropout_rate = layer_dropout self.recurrent_dropout_rate = recurrent_dropout if self.layer_dropout_rate > 0.0 and num_layers < 2: raise ConfigurationError( "Layer dropout must be 0.0 if we have only a single layer")
def __init__(self, encoder, hidden_size, num_programs, num_non_primary_programs, embedding_dim, encoding_dim, indices_non_primary_programs, learning_rate=1e-3, temperature=0.1): super(Policy, self).__init__() self._uniform_init = (-0.1, 0.1) self._hidden_size = hidden_size self.num_programs = num_programs self.num_non_primary_programs = num_non_primary_programs self.embedding_dim = embedding_dim self.encoding_dim = encoding_dim # Initialize networks self.Mprog = Embedding(num_non_primary_programs, embedding_dim) self.encoder = encoder self.lstm = LSTMCell(self.encoding_dim + self.embedding_dim, self._hidden_size) self.critic = CriticNet(self._hidden_size) self.actor = ContinuousActorNet(self._hidden_size, self.num_programs) self.temperature = temperature self.init_networks() self.init_optimizer(lr=learning_rate) # Compute relative indices of non primary programs (to deal with task indices) self.relative_indices = dict( (prog_idx, relat_idx) for relat_idx, prog_idx in enumerate(indices_non_primary_programs))
def init_network(self): """Initialize network parameters. This is an actor-critic build on top of a RNN cell. The actor is a fully connected layer, and the critic consists of two fully connected layers""" self.rnn = LSTMCell(self.action_space, self.hidden_size) for p in self.rnn.parameters(): uniform_(p, self.uniform_init[0], self.uniform_init[1]) self.actor = Linear(self.hidden_size, self.action_space) for p in self.actor.parameters(): uniform_(p, self.uniform_init[0], self.uniform_init[1]) self.middle_critic = Linear(self.hidden_size, self.hidden_size // 2) for p in self.middle_critic.parameters(): uniform_(p, self.uniform_init[0], self.uniform_init[1]) self.critic = Linear(self.hidden_size // 2, 1) for p in self.critic.parameters(): uniform_(p, self.uniform_init[0], self.uniform_init[1]) self.encoder = resnet34(**{"num_classes": self.embedding}) self.padding = ZeroPad2d((30, 20, 0, 0))
def __init__( self, decoding_dim: int, target_embedding_dim: int, attention: Optional[Attention] = None, bidirectional_input: bool = False, num_decoder_layers: int = 1, accumulate_hidden_states: bool = False, dropout: float = 0.2, ) -> None: super().__init__( decoding_dim=decoding_dim, target_embedding_dim=target_embedding_dim, decodes_parallel=False, ) # In this particular type of decoder output of previous step passes directly to the input of current step # We also assume that decoder output dimensionality is equal to the encoder output dimensionality decoder_input_dim = self.target_embedding_dim # Attention mechanism applied to the encoder output for each step. self._attention = attention if self._attention: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. encoder output dim will be same as decoding_dim decoder_input_dim += decoding_dim # Ensure that attention is only set during seq2seq setting. # if not self._seq2seq_mode and self._attention is not None: # raise ConfigurationError("Attention is only specified in Seq2Seq setting.") self._num_decoder_layers = num_decoder_layers if self._num_decoder_layers > 1: self._decoder_cell = LSTM( input_size=decoder_input_dim, hidden_size=self.decoding_dim, num_layers=self._num_decoder_layers, dropout=dropout, ) else: # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(decoder_input_dim, self.decoding_dim) self._bidirectional_input = bidirectional_input self._accumulate_hidden_states = accumulate_hidden_states
def __init__(self, embeddings, max_word=32, multi_image=1, multi_merge='att', context_dim=512, lstm_dim=1000, lambda_a=1.0, teacher_forcing=None, image_model=None, image_pretrained=None, finetune_image=False, image_finetune_epoch=None, rl_opts=None, word_idxs=None, device='gpu', verbose=False): super(ShowAttendAndTell, self).__init__(max_word, multi_image, multi_merge, teacher_forcing, image_finetune_epoch, rl_opts, word_idxs, verbose) self.feat_dim = context_dim self.lstm_dim = lstm_dim self.lambda_a = lambda_a self.dropout = Dropout(0.5) # Image processes if image_model is None: image_model = 'vgg' self.image_feats, image_dim = ImageClassification.image_features( image_model, not finetune_image, True, image_pretrained, device) self._init_multi_image(image_dim, self.VISUAL_NUM, lstm_dim) self.image_proj = Linear(image_dim, context_dim) # Word processes self.init_h = Linear(context_dim, lstm_dim) self.init_c = Linear(context_dim, lstm_dim) self.att_v = Linear(image_dim, image_dim) self.att_h = Linear(lstm_dim, image_dim) self.att_a = Linear(image_dim, 1) self.gate = Linear(lstm_dim, image_dim) input_dim = image_dim + embeddings.shape[1] self.lstm_word = LSTMCell(input_dim, lstm_dim) self.embeddings = Embedding.from_pretrained( embeddings, freeze=False, padding_idx=PretrainedEmbeddings.INDEX_PAD) self.embed_num = self.embeddings.num_embeddings # Deep output self.lh = Linear(lstm_dim, embeddings.shape[1]) self.lz = Linear(image_dim, embeddings.shape[1]) self.lo = Linear(embeddings.shape[1], embeddings.shape[0], bias=False)
def __init__(self, target_token_embedder, input_dim, agenda_dim, decoder_dim, encoder_dim, attn_dim, num_layers, num_inputs, dropout_prob, disable_attention): super(AttentionDecoderCell, self).__init__() target_dim = target_token_embedder.embed_dim self.num_layers = num_layers self.num_inputs = num_inputs self.disable_attention = disable_attention if disable_attention: augment_dim = agenda_dim else: # see definition of `x_augment` in `forward` method # we augment the input to each RNN layer with num_inputs attention contexts + the agenda augment_dim = encoder_dim * num_inputs + agenda_dim self.rnn_cells = [] for layer in range(num_layers): in_dim = input_dim if layer == 0 else decoder_dim # first layer takes word vectors out_dim = decoder_dim rnn_cell = LSTMCell(in_dim + augment_dim, out_dim) self.add_module('decoder_layer_{}'.format(layer), rnn_cell) self.rnn_cells.append(rnn_cell) if disable_attention: z_dim = decoder_dim else: # see definition of `z` in `forward` method # to predict words, we condition on the hidden state h + num_inputs attention context z_dim = decoder_dim + encoder_dim * num_inputs # TODO(kelvin): these big params may need regularization self.vocab_projection_pos = Linear(z_dim, target_dim) self.vocab_projection_neg = Linear(z_dim, target_dim) self.relu = torch.nn.ReLU() self.h0 = Parameter(torch.zeros(decoder_dim)) self.c0 = Parameter(torch.zeros(decoder_dim)) self.vocab_softmax = Softmax() self.input_attentions = [] for i in range(num_inputs): attn = Attention(encoder_dim, decoder_dim, attn_dim) self.add_module('input_attention_{}'.format(i), attn) self.input_attentions.append(attn) self.target_token_embedder = target_token_embedder self.dropout = Dropout(dropout_prob)
def __init__(self, hparams): super().__init__() self.input_size = (1 + hparams.position_number) * hparams.embedding_dim self.hidden_size = hparams.argument_size * hparams.relation_size * hparams.position_size self.zeroth_tuple = Parameter(empty(1, self.input_size)) self.zeroth_cell_state = Parameter(empty(1, self.hidden_size).zero_()) self.cell = LSTMCell(self.input_size, self.hidden_size) self.attention = Attention(hparams) self.unbinding_module = UnbindingModule(hparams) self.init_weights()
def __init__(self, output_size, hidden_size, seq_len, num_layers=1, bias=True, dropout=0, bidirectional=False): super(DecoderLSTM, self).__init__() self.output_size = output_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.dropout = dropout self.dropout_state = {} self.bidirectional = bidirectional self.seq_len = seq_len num_directions = 2 if bidirectional else 1 self.lstm = LSTMCell(output_size, hidden_size) self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.Softmax()
def __init__(self, input_size, output_size, hidden_size, num_layers, lookup): super(LSTMAutoParams, self).__init__() self.input_size = input_size self.output_size = output_size self.hidden_size = hidden_size self.num_layers = num_layers self.lookup = lookup # Output of previous iteration appended to input self.layers = [] for i in range(num_layers): self.layers.append(LSTMCell(input_size, hidden_size)) input_size = hidden_size # Softmax variables self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.Softmax()
def __init__(self, nb_input_channel, nb_out_channel, normalize): super().__init__() self._nb_output_channel = 256 self.linear = Linear(2592, self._nb_output_channel) if normalize: self.lstm = LSTMCellLayerNorm( self._nb_output_channel, self._nb_output_channel) # hack for experiment self.bn_linear = BatchNorm1d(self._nb_output_channel) else: self.bn_linear = Identity() self.lstm = LSTMCell(self._nb_output_channel, self._nb_output_channel) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0)