예제 #1
0
    def __init__(self, hparams):
        super().__init__()
        self.hidden_size = hparams.role_size * hparams.filler_size
        self.temperature = hparams.temperature

        self.zeroth_filler_cell_state = Parameter(
            empty(1, self.hidden_size).zero_())
        self.filler_cell = LSTMCell(hparams.embedding_dim, self.hidden_size)

        self.filler_hidden_to_number = Linear(self.hidden_size,
                                              hparams.filler_number,
                                              bias=False)
        self.filler_dictionary = Linear(hparams.filler_number,
                                        hparams.filler_size,
                                        bias=False)

        self.zeroth_role_cell_state = Parameter(
            empty(1, self.hidden_size).zero_())
        self.role_cell = LSTMCell(hparams.embedding_dim, self.hidden_size)

        self.role_hidden_to_number = Linear(self.hidden_size,
                                            hparams.role_number,
                                            bias=False)
        self.role_dictionary = Linear(hparams.role_number,
                                      hparams.role_size,
                                      bias=False)

        self.zeroth_hidden_state = Parameter(empty(self.hidden_size).zero_())
def get_pytorch_lstm(input_size, used_lstm):
    """Load in a PyTorch LSTM that is a copy of the currently used LSTM."""
    lstm = PytorchLSTM(input_size, 1)
    lstm.bias_hh[:] = tensor(zeros((4, )), dtype=float64)[:]
    lstm.bias_ih[:] = tensor(used_lstm.bias, dtype=float64)[:]
    lstm.weight_hh[:] = tensor(used_lstm.weight_hh, dtype=float64)[:]
    lstm.weight_ih[:] = tensor(used_lstm.weight_xh, dtype=float64)[:]
    return lstm
예제 #3
0
    def __init__(self,
                 state_size,
                 num_actions,
                 act_lim=1,
                 batch_size=1,
                 hidden_size=128,
                 num_layers=2,
                 dropout=0.85):
        """ Construct a multilayer LSTM that computes the action given the state

            The agent will first decide which dimension to act on and then decide the numerical value of the aciton on that dimension

            - shape of input state is given by state_size
            - dimensions of the orthogonal action space is given by num_actions, whereas act_lim gives the numerical bound for action values
                Note: the last action dimension is assumed to be discrete, meaning the agent "does nothing".
            - hidden_size should match that of the encoding network (i.e. the size of the encoding layer)

        """
        super(PolicyNet, self).__init__()

        self.state_size = state_size
        self.num_actions = num_actions
        self.act_lim = act_lim
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout

        # Create multilayer LSTM cells
        self.cell_list = nn.ModuleList()
        self.cell_list.append(
            LSTMCell(input_size=state_size, hidden_size=hidden_size))
        for i in range(1, num_layers):
            self.cell_list.append(
                LSTMCell(input_size=hidden_size, hidden_size=hidden_size))

        # Linear layer that decides the dimension the agent wants to act on.
        #   Return the logits to be used to construct a Categorical distribution
        self.FC_decision = Linear(hidden_size, num_actions)
        # Linear layer that computes the mean value of the agent's action on each dimension
        self.FC_values_mean = Linear(hidden_size, num_actions)
        # Linear layer that computes the log standard deviation of the agent's action on each dimension
        self.FC_values_logstd = Linear(hidden_size, num_actions)

        # Variables to store lists of hidden states and cell states at the end of each time step so as to be used as
        #   the input values to the next time step
        # Reset to None at the start of each episode
        self.h_list = None
        self.c_list = None
예제 #4
0
    def __init__(self, channels, h_g, h_l, std, hidden_size, num_classes,
                 learned_start):
        """
        Initialize the recurrent attention model and its
        different components.

        Args
        ----
        - g: size of the square patches in the glimpses extracted
          by the retina.
        - k: number of patches to extract per glimpse.
        - s: scaling factor that controls the size of successive patches.
        - c: number of num_channels in each image.
        - h_g: hidden layer size of the fc layer for `phi`.
        - h_l: hidden layer size of the fc layer for `l`.
        - std: standard deviation of the Gaussian policy.
        - hidden_size: hidden size of the rnn.
        - num_classes: number of num_classes in the dataset.
        - num_glimpses: number of glimpses to take per image,
          i.e. number of BPTT steps.
        """
        super(RecurrentAttention, self).__init__()
        self.std = std

        self.sensor = glimpse_network(h_g, h_l, learned_start, channels)
        self.rnn = LSTMCell(256, hidden_size)
        self.decision = decision_network(hidden_size, 2)
        self.illuminator = illumination_network(hidden_size, channels, std)
        self.classifier = action_network(hidden_size, num_classes)
예제 #5
0
    def __init__(
        self,
        decoding_dim: int,
        target_embedding_dim: int,
        attention: Optional[Attention] = None,
        bidirectional_input: bool = False,
    ) -> None:

        super().__init__(
            decoding_dim=decoding_dim,
            target_embedding_dim=target_embedding_dim,
            decodes_parallel=False,
        )

        # In this particular type of decoder output of previous step passes directly to the input of current step
        # We also assume that decoder output dimensionality is equal to the encoder output dimensionality
        decoder_input_dim = self.target_embedding_dim

        # Attention mechanism applied to the encoder output for each step.
        self._attention = attention

        if self._attention:
            # If using attention, a weighted average over encoder outputs will be concatenated
            # to the previous target embedding to form the input to the decoder at each
            # time step. encoder output dim will be same as decoding_dim
            decoder_input_dim += decoding_dim

        # We'll use an LSTM cell as the recurrent cell that produces a hidden state
        # for the decoder at each time step.
        self._decoder_cell = LSTMCell(decoder_input_dim, self.decoding_dim)
        self._bidirectional_input = bidirectional_input
예제 #6
0
    def __init__(self,
                 embed:       nn.Embedding = None,
                 hidden_size: int = 200,
                 dropout:     float = 0.1,
                 layer:       str = "rcnn",
                 z_rnn_size:  int = 30,
                 ):

        super(DependentLatentModel, self).__init__()

        self.layer = layer
        emb_size = embed.weight.shape[1]
        enc_size = hidden_size * 2

        self.embed_layer = nn.Sequential(embed, nn.Dropout(p=dropout))
        self.enc_layer = get_encoder(layer, emb_size, hidden_size)

        if layer == "rcnn":
            self.z_cell = RCNNCell(enc_size + 1, z_rnn_size)
        else:
            self.z_cell = LSTMCell(enc_size + 1, z_rnn_size)

        self.z_layer = KumaGate(enc_size + z_rnn_size)

        self.z = None      # z samples
        self.z_dists = []  # z distribution(s)

        self.report_params()
예제 #7
0
파일: nnet.py 프로젝트: Apich238/mySAT
    def __init__(self, arch: dict):
        super().__init__()
        self.ldim = arch['latent_dim']
        self.defaultSteps = arch['std_T']
        self.Cinit = torch.nn.Parameter(torch.FloatTensor(self.ldim))
        torch.nn.init.normal_(self.Cinit)
        self.Linit = torch.nn.Parameter(torch.FloatTensor(self.ldim))
        torch.nn.init.normal_(self.Linit)

        self.rec_block = arch['recurrent_block']

        if self.rec_block == 'test':
            # self.block = BiPartialTestBlock(self.ldim, self.ldim, 2 * self.ldim, 2)
            Ldim, Cdim, Hdim, Dpth = self.ldim, self.ldim, 2 * self.ldim, 2
            self.Cmsg = batchMLP(Cdim, Hdim, Cdim, Dpth, False)
            self.Lmsg = batchMLP(Ldim, Hdim, Ldim, Dpth, False)

            self.Cu = batchMLP(Cdim * 2, Hdim, Cdim, Dpth, False)
            self.Lu = batchMLP(Ldim * 3, Hdim, Ldim, Dpth, False)
        elif self.rec_block in ['std_lstm', 'ln_lstm', 'gru']:
            Ldim, Cdim, Hdim, Dpth = self.ldim, self.ldim, self.ldim, 4
            self.Cmsg = batchMLP(Cdim, Hdim, Cdim, Dpth, False)
            self.Lmsg = batchMLP(Ldim, Hdim, Ldim, Dpth, False)

            if self.rec_block == 'std_lstm':
                self.Cu = LSTMCell(self.ldim, self.ldim, True)
                self.Lu = LSTMCell(self.ldim * 2, self.ldim, True)
            elif self.rec_block == 'ln_lstm':
                self.Cu = ln_LSTMCell(self.ldim, self.ldim, True)
                self.Lu = ln_LSTMCell(self.ldim * 2, self.ldim, True)
            elif self.rec_block == 'gru':
                self.Cu = GRUCell(self.ldim, self.ldim, True)
                self.Lu = GRUCell(self.ldim * 2, self.ldim, True)

        self.cl = arch['classifier']
        if self.cl == 'NeuroSAT':
            self.Lvote = batchMLP(self.ldim, 2 * self.ldim, 1, 2, False)
        elif self.cl == 'CircuitSAT-like':
            self.tnormf = arch['tnorm']
            if 'tnorm_train' in arch:
                self.train_tnorm = arch['tnorm_train']
            else:
                self.train_tnorm = self.tnormf
            self.tnorm_tmp = arch['tnorm_temperature']
            self.train_temp = arch['temp_train']
            self.test_temp = arch['temp_test']
            self.Lvote = batchMLP(self.ldim, 2 * self.ldim, 1, 2, False)
예제 #8
0
    def __init__(self, arch=None):
        """
        :param arch: dictionary, for overriding default architecture
        """
        nn.Module.__init__(self)
        self.arch = deepcopy(default_arch)
        if arch is not None:
            self.arch.update(arch)

        self.T = self.arch.max_steps
        self.reinforce_weight = 0.0

        # 4: where + pres
        lstm_input_size = self.arch.input_size + self.arch.z_what_size + 4
        self.lstm_cell = LSTMCell(lstm_input_size, self.arch.lstm_hidden_size)

        # predict z_where, z_pres from h
        self.predict = Predict(self.arch)
        # encode object into what
        self.encoder = Encoder(self.arch)
        # decode what into object
        self.decoder = Decoder(self.arch)

        # spatial transformers
        self.image_to_object = SpatialTransformer(self.arch.input_shape,
                                                  self.arch.object_shape)
        self.object_to_image = SpatialTransformer(self.arch.object_shape,
                                                  self.arch.input_shape)

        # baseline RNN
        self.bl_rnn = LSTMCell(lstm_input_size, self.arch.baseline_hidden_size)
        # predict baseline value
        self.bl_predict = nn.Linear(self.arch.baseline_hidden_size, 1)

        # priors
        self.pres_prior = Bernoulli(probs=self.arch.z_pres_prob_prior)
        self.where_prior = Normal(loc=self.arch.z_where_loc_prior,
                                  scale=self.arch.z_where_scale_prior)
        self.what_prior = Normal(loc=self.arch.z_what_loc_prior,
                                 scale=self.arch.z_what_scale_prior)

        # modules excluding baseline rnn
        self.air_modules = nn.ModuleList(
            [self.predict, self.lstm_cell, self.encoder, self.decoder])

        self.baseline_modules = nn.ModuleList([self.bl_rnn, self.bl_predict])
예제 #9
0
 def __init__(self, token_embedder, embed_dim):
     super(RNNSequenceEmbedder, self).__init__()
     self._embed_dim = embed_dim
     word_dim = token_embedder.embed_dim
     rnn_cell = LSTMCell(word_dim, embed_dim)
     self.source_encoder = SimpleSourceEncoder(rnn_cell)
     self.vocab = token_embedder.vocab
     self.token_embedder = token_embedder
예제 #10
0
 def __init__(self, token_embedder, hidden_dim, input_dim, agenda_dim):
     super(SimpleDecoderCell, self).__init__()
     self.rnn_cell = LSTMCell(input_dim + agenda_dim, hidden_dim)
     self.linear = Linear(hidden_dim, input_dim)
     self.h0 = Parameter(torch.zeros(hidden_dim))
     self.c0 = Parameter(torch.zeros(hidden_dim))
     self.softmax = Softmax(dim=1)
     self.token_embedder = token_embedder
예제 #11
0
    def init_network(self):
        """Initialize network parameters. This is an actor-critic build on top of a RNN cell. The
        actor is a fully connected layer, and the critic consists of two fully connected layers"""
        self.rnn = LSTMCell(self.n_actions, self._hidden_size)
        for p in self.rnn.parameters():
            uniform_(p, self._uniform_init[0], self._uniform_init[1])

        self.actor = Linear(self._hidden_size, self.n_actions)
        for p in self.actor.parameters():
            uniform_(p, self._uniform_init[0], self._uniform_init[1])

        self.middle_critic = Linear(self._hidden_size, self._hidden_size // 2)
        for p in self.middle_critic.parameters():
            uniform_(p, self._uniform_init[0], self._uniform_init[1])

        self.critic = Linear(self._hidden_size // 2, 1)
        for p in self.critic.parameters():
            uniform_(p, self._uniform_init[0], self._uniform_init[1])
예제 #12
0
 def __init__(self,
              embeddings,
              max_word=32,
              multi_image=1,
              multi_merge='att',
              labels=None,
              aete_s=2000,
              aete_r=5,
              lstm_dim=256,
              lambda_a=0.85,
              teacher_forcing=None,
              image_model=None,
              image_pretrained=None,
              finetune_image=False,
              image_finetune_epoch=None,
              rl_opts=None,
              word_idxs=None,
              device='gpu',
              verbose=False):
     super(TieNet, self).__init__(max_word, multi_image, multi_merge,
                                  teacher_forcing, image_finetune_epoch,
                                  rl_opts, word_idxs, verbose)
     # Label statistics
     self.chexpert_labels, self.lp, self.ln, self.lq = self._load_labels(
         labels)
     # Various NN parameters
     self.feat_dim = lstm_dim
     self.lstm_dim = lstm_dim
     self.lambda_a = lambda_a
     self.dropout = Dropout(0.5)
     # Image processes
     if image_model is None:
         image_model = 'resnet50'
     self.image_feats, image_dim = ImageClassification.image_features(
         image_model, not finetune_image, True, image_pretrained, device)
     self._init_multi_image(image_dim, self.VISUAL_NUM, lstm_dim)
     self.image_proj = Linear(image_dim, lstm_dim)
     # Word processes
     self.init_h = Linear(lstm_dim, lstm_dim)
     self.init_c = Linear(lstm_dim, lstm_dim)
     self.att_v = Linear(image_dim, image_dim)
     self.att_h = Linear(lstm_dim, image_dim)
     self.att_a = Linear(image_dim, 1)
     self.gate = Linear(lstm_dim, image_dim)
     input_dim = image_dim + embeddings.shape[1]
     self.lstm_word = LSTMCell(input_dim, lstm_dim)
     self.embeddings = Embedding.from_pretrained(
         embeddings,
         freeze=False,
         padding_idx=PretrainedEmbeddings.INDEX_PAD)
     self.embed_num = self.embeddings.num_embeddings
     self.word_dense = Linear(lstm_dim, embeddings.shape[0], bias=False)
     # AETE processes
     self.aete1 = Linear(lstm_dim, aete_s)
     self.aete2 = Linear(aete_s, aete_r)
     # Joint
     self.joint = Linear(lstm_dim + image_dim, self.DISEASE_NUM * 2)
예제 #13
0
 def __init__(self, 
         input_size,
         hidden_size):
     super(mLSTMCell, self).__init__()
     self._input_size = input_size
     self._hidden_size = hidden_size
     self._lstm_cell = weight_norm(LSTMCell(input_size, hidden_size), name='weight_ih')
     self._lstm_cell = weight_norm(self._lstm_cell, name='weight_hh')
     self._i_multiplier = weight_norm(Linear(input_size, hidden_size, bias=False))
     self._h_multiplier = weight_norm(Linear(hidden_size, hidden_size, bias=False))
예제 #14
0
 def __init__(self, input_size, output_size, hidden_size):
     super(LSTMAuto, self).__init__()
     self.input_size = input_size
     self.output_size = output_size
     self.hidden_size = hidden_size
     # Output of previous iteration appended to input
     self.lstmCell = LSTMCell(output_size + input_size, hidden_size)
     # Softmax variables
     self.linear = nn.Linear(hidden_size, output_size)
     self.softmax = nn.Softmax()
예제 #15
0
    def __init__(self, input_shape, id, normalize, nb_hidden):
        super().__init__(input_shape, id)
        self._nb_hidden = nb_hidden

        if normalize:
            self.lstm = LSTMCellLayerNorm(input_shape[0], nb_hidden)
        else:
            self.lstm = LSTMCell(input_shape[0], nb_hidden)
            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
예제 #16
0
 def __init__(self, h_g, h_l, std, hidden_size, num_classes, patch_amount, patch_size, scale_factor):
     super(AdaptiveAttention, self).__init__()
     self.std = std
     self.sensor = GlimpseNetwork(hidden_size, patch_amount=patch_amount, patch_size=patch_size,
                                  scale_factor=scale_factor)
     self.rnn = LSTMCell(hidden_size, hidden_size)
     self.decider = DecisionNetwork(hidden_size, 2)
     self.locator = LocationNetwork(hidden_size, 2, std)
     self.classifier = ActionNetwork(hidden_size, num_classes)
     self.baseliner = BaselineNetwork(hidden_size, 1)
예제 #17
0
    def __init__(self, emb_dim, classifier_type):
        """

        :param emb_dim:
        :param classifier_type: тип классификатора: 1 - по состоянию верхней вершины, 2 - по состоянию всех вершин, 3 - по состоянию только вершин-переменных, 5 - вычисление по логике Заде, 6 - вычисление по вероятностной логике, 7 - вычисление по логике Лукашевича
        """
        super().__init__()
        self.classifier_type = classifier_type
        self.emb_dim = emb_dim

        self.start_embeddings1 = torch.zeros([self.emb_dim], requires_grad=False)
        # self.start_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim))
        # torch.nn.init.normal_(self.start_embeddings, 0, 1)

        self.con_embeddings1 = torch.tensor([1., 0., 0., 0., ], requires_grad=False)
        # self.con_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim))
        # torch.nn.init.normal_(self.con_embeddings, 0, 1)

        self.dis_embeddings1 = torch.tensor([0., 1., 0., 0., ], requires_grad=False)
        # self.con_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim))
        # torch.nn.init.normal_(self.con_embeddings, 0, 1)

        self.neg_embeddings1 = torch.tensor([0., 0., 1., 0., ], requires_grad=False)
        # self.neg_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim))
        # torch.nn.init.normal_(self.neg_embeddings, 0, 1)

        self.var_embeddings1 = torch.tensor([0., 0., 0., 1., ], requires_grad=False)
        # self.var_embeddings = torch.nn.Parameter(torch.FloatTensor(self.emb_dim))
        # torch.nn.init.normal_(self.var_embeddings, 0, 1)

        # self.msg1_1_f = Linear(2 * self.emb_dim, 2 * self.emb_dim, True)
        self.msg1_1_f = Linear(self.emb_dim + 4, 2 * self.emb_dim, True)
        self.msg1_2_f = Linear(2 * self.emb_dim, self.emb_dim, True)

        # self.msg2_1_f = Linear(2 * self.emb_dim, 2 * self.emb_dim, True)
        self.msg2_1_f = Linear(self.emb_dim + 4, 2 * self.emb_dim, True)
        self.msg2_2_f = Linear(2 * self.emb_dim, self.emb_dim, True)

        self.update_f = LSTMCell(self.emb_dim, self.emb_dim, self.emb_dim)

        self.clf_1 = Linear(self.emb_dim, self.emb_dim, True)
        self.clf_2 = Linear(self.emb_dim, 1, False)

        if self.classifier_type in [1, 2, 3, 4]:
            pass
            # self.cl = Linear(self.emb_dim, 1, False)
        elif self.classifier_type == 5:
            # self.tvalue = Linear(self.emb_dim, 1, False)
            self.cl = GodelEvaluation()
        elif self.classifier_type == 6:
            # self.tvalue = Linear(self.emb_dim, 1, False)
            self.cl = ProbabilisticEvaluation()
        elif self.classifier_type == 7:
            # self.tvalue = Linear(self.emb_dim, 1, False)
            self.cl = LukasieviczEvaluation()
예제 #18
0
 def __init__(self,
              input_size: int,
              hidden_size: int = 200,
              bias: bool = True,
              dropout: float = 0.):
     super().__init__(hidden_size)
     self.output_dim = hidden_size
     self.states: List[Tensor] = list()
     self.items = list()
     self.cell = LSTMCell(input_size, hidden_size, bias)
     self.dropout = Dropout(dropout)  # TODO:每一个instance的mask要一致
예제 #19
0
    def __init__(self, nb_input_channel, nb_out_channel, normalize):
        super().__init__()
        self._nb_output_channel = nb_out_channel

        if normalize:
            self.lstm = LSTMCellLayerNorm(nb_input_channel,
                                          self._nb_output_channel)
        else:
            self.lstm = LSTMCell(nb_input_channel, self._nb_output_channel)
            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
예제 #20
0
 def __init__(self,params:configargparse.Namespace,att: torch.nn.Module=None):
     """
     Neural Network Module for the Sequence to Sequence LAS Model
     :params configargparse.Namespace params: The training options
     :params torch.nn.Module att: The attention module
     """        
     super(Speller,self).__init__()
     ## Embedding Layer
     self.embed = Embedding(params.odim,params.demb_dim)
     ## Decoder with LSTM Cells
     self.decoder = ModuleList()
     self.dropout_dec = ModuleList()
     self.dtype = params.dtype
     self.dunits = params.dhiddens
     self.dlayers = params.dlayers
     self.decoder += [
             LSTMCell(params.eprojs + params.demb_dim, params.dhiddens)
             if self.dtype == "lstm"
             else GRUCell(params.eprojs + params.demb_dim, params.dhiddens)
         ]
     self.dropout_dec += [Dropout(p=params.ddropout)]
     self.dropout_emb = Dropout(p=params.ddropout)
     ## Other decoder layers if > 1 decoder layer
     for i in range(1,params.dlayers): 
         self.decoder += [
             LSTMCell(params.dhiddens, params.dhiddens)
             if self.dtype == "lstm"
             else GRUCell(params.dhiddens, params.dhiddens)
         ]
         self.dropout_dec += [LockedDropout(p=params.ddropout)] # Dropout
     
     ## Project to Softmax Space- Output
     self.projections = Linear(params.dhiddens, params.odim)
     ## Attention Module
     self.att = att
     ## Scheduled Sampling
     self.sampling_probability = params.ssprob
     ## Initialize EOS, SOS
     self.eos = len(params.char_list) -1
     self.sos = self.eos
     self.ignore_id = params.text_pad
예제 #21
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 num_layers: int = 1,
                 layer_dropout: float = 0.0,
                 recurrent_dropout: float = 0.0):
        super().__init__(input_dim, hidden_dim)
        self.hidden = None
        self.context = None
        assert num_layers >= 1
        self.layers = num_layers
        self._lstm_cell0 = LSTMCell(input_dim, hidden_dim)
        self._lstm_cellL = [
            LSTMCell(hidden_dim, hidden_dim) for _ in range(num_layers - 1)
        ]
        self.layer_dropout_rate = layer_dropout
        self.recurrent_dropout_rate = recurrent_dropout

        if self.layer_dropout_rate > 0.0 and num_layers < 2:
            raise ConfigurationError(
                "Layer dropout must be 0.0 if we have only a single layer")
예제 #22
0
    def __init__(self,
                 encoder,
                 hidden_size,
                 num_programs,
                 num_non_primary_programs,
                 embedding_dim,
                 encoding_dim,
                 indices_non_primary_programs,
                 learning_rate=1e-3,
                 temperature=0.1):

        super(Policy, self).__init__()

        self._uniform_init = (-0.1, 0.1)

        self._hidden_size = hidden_size
        self.num_programs = num_programs
        self.num_non_primary_programs = num_non_primary_programs

        self.embedding_dim = embedding_dim
        self.encoding_dim = encoding_dim

        # Initialize networks
        self.Mprog = Embedding(num_non_primary_programs, embedding_dim)
        self.encoder = encoder

        self.lstm = LSTMCell(self.encoding_dim + self.embedding_dim,
                             self._hidden_size)
        self.critic = CriticNet(self._hidden_size)
        self.actor = ContinuousActorNet(self._hidden_size, self.num_programs)

        self.temperature = temperature

        self.init_networks()
        self.init_optimizer(lr=learning_rate)

        # Compute relative indices of non primary programs (to deal with task indices)
        self.relative_indices = dict(
            (prog_idx, relat_idx)
            for relat_idx, prog_idx in enumerate(indices_non_primary_programs))
예제 #23
0
    def init_network(self):
        """Initialize network parameters. This is an actor-critic build on top of a RNN cell. The
        actor is a fully connected layer, and the critic consists of two fully connected layers"""
        self.rnn = LSTMCell(self.action_space, self.hidden_size)
        for p in self.rnn.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.actor = Linear(self.hidden_size, self.action_space)
        for p in self.actor.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.middle_critic = Linear(self.hidden_size, self.hidden_size // 2)
        for p in self.middle_critic.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.critic = Linear(self.hidden_size // 2, 1)
        for p in self.critic.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.encoder = resnet34(**{"num_classes": self.embedding})

        self.padding = ZeroPad2d((30, 20, 0, 0))
예제 #24
0
    def __init__(
        self,
        decoding_dim: int,
        target_embedding_dim: int,
        attention: Optional[Attention] = None,
        bidirectional_input: bool = False,
        num_decoder_layers: int = 1,
        accumulate_hidden_states: bool = False,
        dropout: float = 0.2,
    ) -> None:

        super().__init__(
            decoding_dim=decoding_dim,
            target_embedding_dim=target_embedding_dim,
            decodes_parallel=False,
        )

        # In this particular type of decoder output of previous step passes directly to the input of current step
        # We also assume that decoder output dimensionality is equal to the encoder output dimensionality
        decoder_input_dim = self.target_embedding_dim

        # Attention mechanism applied to the encoder output for each step.
        self._attention = attention

        if self._attention:
            # If using attention, a weighted average over encoder outputs will be concatenated
            # to the previous target embedding to form the input to the decoder at each
            # time step. encoder output dim will be same as decoding_dim
            decoder_input_dim += decoding_dim

        # Ensure that attention is only set during seq2seq setting.
        # if not self._seq2seq_mode and self._attention is not None:
        #     raise ConfigurationError("Attention is only specified in Seq2Seq setting.")

        self._num_decoder_layers = num_decoder_layers
        if self._num_decoder_layers > 1:
            self._decoder_cell = LSTM(
                input_size=decoder_input_dim,
                hidden_size=self.decoding_dim,
                num_layers=self._num_decoder_layers,
                dropout=dropout,
            )
        else:
            # We'll use an LSTM cell as the recurrent cell that produces a hidden state
            # for the decoder at each time step.
            # TODO (pradeep): Do not hardcode decoder cell type.
            self._decoder_cell = LSTMCell(decoder_input_dim, self.decoding_dim)

        self._bidirectional_input = bidirectional_input

        self._accumulate_hidden_states = accumulate_hidden_states
예제 #25
0
파일: sat.py 프로젝트: TCBpenta8/ifcc-1
    def __init__(self,
                 embeddings,
                 max_word=32,
                 multi_image=1,
                 multi_merge='att',
                 context_dim=512,
                 lstm_dim=1000,
                 lambda_a=1.0,
                 teacher_forcing=None,
                 image_model=None,
                 image_pretrained=None,
                 finetune_image=False,
                 image_finetune_epoch=None,
                 rl_opts=None,
                 word_idxs=None,
                 device='gpu',
                 verbose=False):
        super(ShowAttendAndTell,
              self).__init__(max_word, multi_image, multi_merge,
                             teacher_forcing, image_finetune_epoch, rl_opts,
                             word_idxs, verbose)
        self.feat_dim = context_dim
        self.lstm_dim = lstm_dim
        self.lambda_a = lambda_a

        self.dropout = Dropout(0.5)
        # Image processes
        if image_model is None:
            image_model = 'vgg'
        self.image_feats, image_dim = ImageClassification.image_features(
            image_model, not finetune_image, True, image_pretrained, device)
        self._init_multi_image(image_dim, self.VISUAL_NUM, lstm_dim)
        self.image_proj = Linear(image_dim, context_dim)
        # Word processes
        self.init_h = Linear(context_dim, lstm_dim)
        self.init_c = Linear(context_dim, lstm_dim)
        self.att_v = Linear(image_dim, image_dim)
        self.att_h = Linear(lstm_dim, image_dim)
        self.att_a = Linear(image_dim, 1)
        self.gate = Linear(lstm_dim, image_dim)
        input_dim = image_dim + embeddings.shape[1]
        self.lstm_word = LSTMCell(input_dim, lstm_dim)
        self.embeddings = Embedding.from_pretrained(
            embeddings,
            freeze=False,
            padding_idx=PretrainedEmbeddings.INDEX_PAD)
        self.embed_num = self.embeddings.num_embeddings
        # Deep output
        self.lh = Linear(lstm_dim, embeddings.shape[1])
        self.lz = Linear(image_dim, embeddings.shape[1])
        self.lo = Linear(embeddings.shape[1], embeddings.shape[0], bias=False)
    def __init__(self, target_token_embedder, input_dim, agenda_dim,
                 decoder_dim, encoder_dim, attn_dim, num_layers, num_inputs,
                 dropout_prob, disable_attention):
        super(AttentionDecoderCell, self).__init__()

        target_dim = target_token_embedder.embed_dim
        self.num_layers = num_layers
        self.num_inputs = num_inputs
        self.disable_attention = disable_attention

        if disable_attention:
            augment_dim = agenda_dim
        else:
            # see definition of `x_augment` in `forward` method
            # we augment the input to each RNN layer with num_inputs attention contexts + the agenda
            augment_dim = encoder_dim * num_inputs + agenda_dim

        self.rnn_cells = []
        for layer in range(num_layers):
            in_dim = input_dim if layer == 0 else decoder_dim  # first layer takes word vectors
            out_dim = decoder_dim
            rnn_cell = LSTMCell(in_dim + augment_dim, out_dim)
            self.add_module('decoder_layer_{}'.format(layer), rnn_cell)
            self.rnn_cells.append(rnn_cell)

        if disable_attention:
            z_dim = decoder_dim
        else:
            # see definition of `z` in `forward` method
            # to predict words, we condition on the hidden state h + num_inputs attention context
            z_dim = decoder_dim + encoder_dim * num_inputs

        # TODO(kelvin): these big params may need regularization
        self.vocab_projection_pos = Linear(z_dim, target_dim)
        self.vocab_projection_neg = Linear(z_dim, target_dim)
        self.relu = torch.nn.ReLU()

        self.h0 = Parameter(torch.zeros(decoder_dim))
        self.c0 = Parameter(torch.zeros(decoder_dim))
        self.vocab_softmax = Softmax()

        self.input_attentions = []
        for i in range(num_inputs):
            attn = Attention(encoder_dim, decoder_dim, attn_dim)
            self.add_module('input_attention_{}'.format(i), attn)
            self.input_attentions.append(attn)

        self.target_token_embedder = target_token_embedder

        self.dropout = Dropout(dropout_prob)
예제 #27
0
    def __init__(self, hparams):
        super().__init__()

        self.input_size = (1 + hparams.position_number) * hparams.embedding_dim
        self.hidden_size = hparams.argument_size * hparams.relation_size * hparams.position_size

        self.zeroth_tuple = Parameter(empty(1, self.input_size))

        self.zeroth_cell_state = Parameter(empty(1, self.hidden_size).zero_())
        self.cell = LSTMCell(self.input_size, self.hidden_size)

        self.attention = Attention(hparams)
        self.unbinding_module = UnbindingModule(hparams)

        self.init_weights()
예제 #28
0
 def __init__(self, output_size, hidden_size, seq_len, num_layers=1, bias=True, dropout=0, bidirectional=False):
     super(DecoderLSTM, self).__init__()
     self.output_size = output_size
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.bias = bias
     self.dropout = dropout
     self.dropout_state = {}
     self.bidirectional = bidirectional
     self.seq_len = seq_len
     num_directions = 2 if bidirectional else 1
     
     self.lstm = LSTMCell(output_size, hidden_size)
     self.linear = nn.Linear(hidden_size, output_size)
     self.softmax = nn.Softmax()
예제 #29
0
 def __init__(self, input_size, output_size, hidden_size, num_layers, lookup):
     super(LSTMAutoParams, self).__init__()
     self.input_size = input_size
     self.output_size = output_size
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.lookup = lookup
     # Output of previous iteration appended to input
     self.layers = []
     for i in range(num_layers):
         self.layers.append(LSTMCell(input_size, hidden_size))
         input_size = hidden_size
     # Softmax variables
     self.linear = nn.Linear(hidden_size, output_size)
     self.softmax = nn.Softmax()
예제 #30
0
    def __init__(self, nb_input_channel, nb_out_channel, normalize):
        super().__init__()
        self._nb_output_channel = 256
        self.linear = Linear(2592, self._nb_output_channel)

        if normalize:
            self.lstm = LSTMCellLayerNorm(
                self._nb_output_channel,
                self._nb_output_channel)  # hack for experiment
            self.bn_linear = BatchNorm1d(self._nb_output_channel)
        else:
            self.bn_linear = Identity()
            self.lstm = LSTMCell(self._nb_output_channel,
                                 self._nb_output_channel)
            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)