Esempio n. 1
0
    def __init__(self, args, vocab, transition_system):
        super(Parser, self).__init__()

        self.args = args
        self.vocab = vocab

        self.transition_system = transition_system
        self.grammar = self.transition_system.grammar

        # Embedding layers
        self.src_embed = nn.Embedding(len(vocab.source), args.embed_size)
        self.production_embed = nn.Embedding(
            len(transition_system.grammar) + 1, args.action_embed_size)
        self.primitive_embed = nn.Embedding(len(vocab.primitive),
                                            args.action_embed_size)
        self.field_embed = nn.Embedding(len(transition_system.grammar.fields),
                                        args.field_embed_size)
        self.type_embed = nn.Embedding(len(transition_system.grammar.types),
                                       args.type_embed_size)

        nn.init.xavier_normal(self.src_embed.weight.data)
        nn.init.xavier_normal(self.production_embed.weight.data)
        nn.init.xavier_normal(self.primitive_embed.weight.data)
        nn.init.xavier_normal(self.field_embed.weight.data)
        nn.init.xavier_normal(self.type_embed.weight.data)

        # LSTMs
        if args.lstm == 'lstm':
            self.encoder_lstm = nn.LSTM(args.embed_size,
                                        args.hidden_size // 2,
                                        bidirectional=True)
            self.decoder_lstm = nn.LSTMCell(
                args.action_embed_size +  # previous action
                args.action_embed_size + args.field_embed_size +
                args.type_embed_size +  # frontier info
                args.hidden_size +  # parent hidden state
                args.hidden_size,  # input feeding
                args.hidden_size)
        else:
            from .lstm import LSTM, LSTMCell
            self.encoder_lstm = LSTM(args.embed_size,
                                     args.hidden_size // 2,
                                     bidirectional=True,
                                     dropout=args.dropout)
            self.decoder_lstm = LSTMCell(
                args.action_embed_size +  # previous action
                args.action_embed_size + args.field_embed_size +
                args.type_embed_size +  # frontier info
                args.hidden_size + args.hidden_size,  # parent hidden state
                args.hidden_size,
                dropout=args.dropout)

        # pointer net
        self.src_pointer_net = PointerNet(args.hidden_size, args.hidden_size)

        self.primitive_predictor = nn.Linear(args.hidden_size, 2)

        # initialize the decoder's state and cells with encoder hidden states
        self.decoder_cell_init = nn.Linear(args.hidden_size, args.hidden_size)

        # attention: dot product attention
        # project source encoding to decoder rnn's h space
        self.att_src_linear = nn.Linear(args.hidden_size,
                                        args.hidden_size,
                                        bias=False)

        # transformation of decoder hidden states and context vectors before reading out target words
        # this produces the `attentional vector` in (Luong et al., 2015)
        self.att_vec_linear = nn.Linear(args.hidden_size + args.hidden_size,
                                        args.hidden_size,
                                        bias=False)

        # embedding layers
        self.query_vec_to_embed = nn.Linear(args.hidden_size,
                                            args.embed_size,
                                            bias=False)
        self.production_readout_b = nn.Parameter(
            torch.FloatTensor(len(transition_system.grammar) + 1).zero_())
        self.tgt_token_readout_b = nn.Parameter(
            torch.FloatTensor(len(vocab.primitive)).zero_())
        self.production_readout = self.production_readout_func
        self.tgt_token_readout = self.tgt_token_readout_func
        # self.production_readout = nn.Linear(args.hidden_size, len(transition_system.grammar) + 1)
        # self.tgt_token_readout = nn.Linear(args.hidden_size, len(vocab.primitive))

        # dropout layer
        self.dropout = nn.Dropout(args.dropout)

        if args.cuda:
            self.new_long_tensor = torch.cuda.LongTensor
            self.new_tensor = torch.cuda.FloatTensor
        else:
            self.new_long_tensor = torch.LongTensor
            self.new_tensor = torch.FloatTensor
Esempio n. 2
0
 def __init__(self, input_size, hidden_size, noisin=0):
     super().__init__()
     self.cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size)
     self.reset_parameters()
Esempio n. 3
0
 def __init__(self):
     super(Sequence, self).__init__()
     self.lstm1 = nn.LSTMCell(1, 51)
     self.lstm2 = nn.LSTMCell(51, 51)
     self.linear = nn.Linear(51, 1)
    def __init__(self, args):
        print("Encoder model --- LSTMCELL")
        super(Encoder_WordLstm, self).__init__()
        self.args = args

        # random
        self.char_embed = nn.Embedding(
            self.args.embed_char_num,
            self.args.embed_char_dim,
            sparse=False,
            padding_idx=self.args.create_alphabet.char_PaddingID)
        # for index in range(self.args.embed_char_dim):
        #     self.char_embed.weight.data[self.args.create_alphabet.char_PaddingID][index] = 0
        self.char_embed.weight.requires_grad = True

        self.bichar_embed = nn.Embedding(
            self.args.embed_bichar_num,
            self.args.embed_bichar_dim,
            sparse=False,
            padding_idx=self.args.create_alphabet.bichar_PaddingID)
        # for index in range(self.args.embed_bichar_dim):
        #     self.bichar_embed.weight.data[self.args.create_alphabet.bichar_PaddingID][index] = 0
        self.bichar_embed.weight.requires_grad = True

        # fix the word embedding
        self.static_char_embed = nn.Embedding(
            self.args.static_embed_char_num,
            self.args.embed_char_dim,
            sparse=False,
            padding_idx=self.args.create_static_alphabet.char_PaddingID)
        init.uniform(self.static_char_embed.weight,
                     a=-np.sqrt(3 / self.args.embed_char_dim),
                     b=np.sqrt(3 / self.args.embed_char_dim))
        self.static_bichar_embed = nn.Embedding(
            self.args.static_embed_bichar_num,
            self.args.embed_bichar_dim,
            sparse=False,
            padding_idx=self.args.create_static_alphabet.bichar_PaddingID)
        init.uniform(self.static_bichar_embed.weight,
                     a=-np.sqrt(3 / self.args.embed_bichar_dim),
                     b=np.sqrt(3 / self.args.embed_bichar_dim))

        # load external word embedding
        if args.char_Embedding is True:
            print("char_Embedding")
            pretrained_char_weight = np.array(args.pre_char_word_vecs)
            self.static_char_embed.weight.data.copy_(
                torch.from_numpy(pretrained_char_weight))
            for index in range(self.args.embed_char_dim):
                self.static_char_embed.weight.data[
                    self.args.create_static_alphabet.char_PaddingID][index] = 0
        self.static_char_embed.weight.requires_grad = False

        if args.bichar_Embedding is True:
            print("bichar_Embedding")
            pretrained_bichar_weight = np.array(args.pre_bichar_word_vecs)
            self.static_bichar_embed.weight.data.copy_(
                torch.from_numpy(pretrained_bichar_weight))
            # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_PaddingID])
            # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_UnkID])
            for index in range(self.args.embed_bichar_dim):
                self.static_bichar_embed.weight.data[
                    self.args.create_static_alphabet.
                    bichar_PaddingID][index] = 0
        self.static_bichar_embed.weight.requires_grad = False

        self.lstm_left = nn.LSTMCell(input_size=self.args.hidden_size,
                                     hidden_size=self.args.rnn_hidden_dim,
                                     bias=True)
        self.lstm_right = nn.LSTMCell(input_size=self.args.hidden_size,
                                      hidden_size=self.args.rnn_hidden_dim,
                                      bias=True)

        # init lstm weight and bias
        init.xavier_uniform(self.lstm_left.weight_ih)
        init.xavier_uniform(self.lstm_left.weight_hh)
        init.xavier_uniform(self.lstm_right.weight_ih)
        init.xavier_uniform(self.lstm_right.weight_hh)
        value = np.sqrt(6 / (self.args.rnn_hidden_dim + 1))
        self.lstm_left.bias_hh.data.uniform_(-value, value)
        self.lstm_left.bias_ih.data.uniform_(-value, value)
        self.lstm_right.bias_hh.data.uniform_(-value, value)
        self.lstm_right.bias_ih.data.uniform_(-value, value)

        self.hidden_l = self.init_hidden_cell(self.args.batch_size)
        self.hidden_r = self.init_hidden_cell(self.args.batch_size)

        self.dropout = nn.Dropout(self.args.dropout)
        self.dropout_embed = nn.Dropout(self.args.dropout_embed)

        self.input_dim = (self.args.embed_char_dim +
                          self.args.embed_bichar_dim) * 2
        if self.args.use_cuda is True:
            self.liner = nn.Linear(in_features=self.input_dim,
                                   out_features=self.args.hidden_size,
                                   bias=True).cuda()
        else:
            self.liner = nn.Linear(in_features=self.input_dim,
                                   out_features=self.args.hidden_size,
                                   bias=True)

        # init linear
        init.xavier_uniform(self.liner.weight)
        init_linear_value = np.sqrt(6 / (self.args.hidden_size + 1))
        self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
Esempio n. 5
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)

        # encoder will be feed the word embeddings for the source sentence, and yield hidden states and cell states for both the forwards and backwards LSTMs
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               bidirectional=True,
                               bias=True)

        # decoder is initialized with a linear projection of the engcoder's final hidden state and final cell state, and feed the matching target sentence word embeddings
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        self.h_projection = nn.Linear(in_features=hidden_size * 2,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=hidden_size * 2,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=hidden_size * 2,
                                        out_features=hidden_size,
                                        bias=False)

        # transformation of decoder hidden states and context vectors before reading out target words
        # this produces the `attentional vector` in (Luong et al., 2015)
        self.combined_output_projection = nn.Linear(
            in_features=hidden_size * 2 + hidden_size,
            out_features=hidden_size,
            bias=False)

        # prediction layer of the target vocabulary
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(dropout_rate)
Esempio n. 6
0
 def __init__(self):
     super(LstmCell, self).__init__()
     self.LstmCell = nn.LSTMCell(input_size=256, hidden_size=256)
Esempio n. 7
0
 def __init__(self, input_dim, process_step=4):
     super(QueryEncoder, self).__init__()
     self.input_dim = input_dim
     self.process_step = process_step
     # self.batch_size = batch_size
     self.process = nn.LSTMCell(input_dim, 2*input_dim)
Esempio n. 8
0
    def __init__(self, in_channels, frame_channels, r, attn_type, attn_win,
                 attn_norm, prenet_type, prenet_dropout, forward_attn,
                 trans_agent, forward_attn_mask, location_attn, attn_K,
                 separate_stopnet):
        super(Decoder, self).__init__()
        self.frame_channels = frame_channels
        self.r_init = r
        self.r = r
        self.encoder_embedding_dim = in_channels
        self.separate_stopnet = separate_stopnet
        self.max_decoder_steps = 1000
        self.stop_threshold = 0.5

        # model dimensions
        self.query_dim = 1024
        self.decoder_rnn_dim = 1024
        self.prenet_dim = 256
        self.attn_dim = 128
        self.p_attention_dropout = 0.1
        self.p_decoder_dropout = 0.1

        # memory -> |Prenet| -> processed_memory
        prenet_dim = self.frame_channels
        self.prenet = Prenet(prenet_dim,
                             prenet_type,
                             prenet_dropout,
                             out_features=[self.prenet_dim, self.prenet_dim],
                             bias=False)

        self.attention_rnn = nn.LSTMCell(self.prenet_dim + in_channels,
                                         self.query_dim,
                                         bias=True)

        self.attention = init_attn(attn_type=attn_type,
                                   query_dim=self.query_dim,
                                   embedding_dim=in_channels,
                                   attention_dim=128,
                                   location_attention=location_attn,
                                   attention_location_n_filters=32,
                                   attention_location_kernel_size=31,
                                   windowing=attn_win,
                                   norm=attn_norm,
                                   forward_attn=forward_attn,
                                   trans_agent=trans_agent,
                                   forward_attn_mask=forward_attn_mask,
                                   attn_K=attn_K)

        self.decoder_rnn = nn.LSTMCell(self.query_dim + in_channels,
                                       self.decoder_rnn_dim,
                                       bias=True)

        self.linear_projection = Linear(self.decoder_rnn_dim + in_channels,
                                        self.frame_channels * self.r_init)

        self.stopnet = nn.Sequential(
            nn.Dropout(0.1),
            Linear(self.decoder_rnn_dim + self.frame_channels * self.r_init,
                   1,
                   bias=True,
                   init_gain='sigmoid'))
        self.memory_truncated = None
    def __init__(self,
                 lstm_settings_dict,
                 feature_size_dict={
                     'acous': 0,
                     'visual': 0
                 },
                 batch_size=32,
                 seq_length=200,
                 prediction_length=60,
                 embedding_info=[],
                 seq_wind=10):
        super(LSTMPredictor, self).__init__()

        # General model settings
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.feature_size_dict = feature_size_dict
        self.prediction_length = prediction_length

        # lstm_settings_dict
        self.lstm_settings_dict = lstm_settings_dict
        self.feature_size_dict['master'] = 0
        if self.lstm_settings_dict['no_subnets']:
            for act_mod in self.lstm_settings_dict['active_modalities']:
                self.feature_size_dict['master'] += self.feature_size_dict[
                    act_mod]
        else:
            for act_mod in self.lstm_settings_dict['active_modalities']:
                self.feature_size_dict['master'] += self.lstm_settings_dict[
                    'hidden_dims'][act_mod]
        self.num_layers = lstm_settings_dict['layers']

        # embedding settings
        self.embedding_info = embedding_info
        self.embeddings = {'acous': [], 'visual': []}
        self.embedding_indices = {'acous': [], 'visual': []}
        self.embed_delete_index_list = {'acous': [], 'visual': []}
        self.embed_data_types = {'acous': [], 'visual': []}
        self.len_output_of_embeddings = {'acous': 0, 'visual': 0}
        self.embedding_flags = {}

        # attention
        self.attn = nn.Linear(
            self.feature_size_dict['visual'] +
            self.lstm_settings_dict['hidden_dims']['acous'] * 2,
            self.lstm_settings_dict['hidden_dims']['acous']).type(dtype)
        self.seq_wind = seq_wind
        # m = nn.Linear(20, 30); input = torch.randn(128, 20); output = m(input)
        #####################################

        for modality in self.embedding_info.keys():
            self.embedding_flags[modality] = bool(
                len(self.embedding_info[modality]))
            if self.embedding_flags[modality]:
                for embedding in self.embedding_info[modality]:
                    self.len_output_of_embeddings[
                        modality] += 2 * embedding['embedding_out_dim']
                for emb_func_indx in range(len(self.embedding_info[modality])):
                    if self.embedding_info[modality][emb_func_indx][
                            'embedding_use_func']:
                        self.embeddings[modality].append(
                            nn.Embedding(
                                self.embedding_info[modality][emb_func_indx]
                                ['embedding_num'],
                                self.embedding_info[modality][emb_func_indx]
                                ['embedding_out_dim']).type(dtype))
                        self.embedding_func = self.embeddings[modality][-1]
                        self.embed_data_types[modality].append(dtype_long)
                    elif self.embedding_info[modality][emb_func_indx][
                            'use_glove']:
                        embed_tab_path = self.embedding_info[modality][
                            emb_func_indx]['glove_embed_table']
                        glove_embed_table = pickle.load(
                            open(embed_tab_path, 'rb'))
                        glove_embed_table[0] = np.random.normal(
                            0, 1e5, 300)  # need this to deal with BCE error
                        self.embeddings[modality].append(
                            nn.Embedding.from_pretrained(
                                torch.FloatTensor(glove_embed_table).type(
                                    dtype),
                                freeze=self.lstm_settings_dict['freeze_glove'])
                        )
                        self.embedding_func = self.embeddings[modality][-1]
                        self.embed_data_types[modality].append(dtype_long)
                        print('using glove embeddings')
                    else:
                        self.embeddings[modality].append(
                            nn.Linear(self.embedding_info[modality]
                                      [emb_func_indx]['embedding_num'],
                                      self.embedding_info[modality]
                                      [emb_func_indx]['embedding_out_dim'],
                                      bias=True).type(dtype))
                        self.embedding_linear = self.embeddings[modality][-1]
                        self.embed_data_types[modality].append(dtype)
                    self.embedding_indices[modality].append(
                        self.embedding_info[modality][emb_func_indx]
                        ['emb_indices'])  # two tuples for start and end
                for emb_func_indx in range(len(self.embedding_info[modality])):
                    self.embed_delete_index_list[modality] += list(
                        range(
                            self.embedding_indices[modality][emb_func_indx][0]
                            [0], self.embedding_indices[modality]
                            [emb_func_indx][0][1]))
                    self.embed_delete_index_list[modality] += list(
                        range(
                            self.embedding_indices[modality][emb_func_indx][1]
                            [0], self.embedding_indices[modality]
                            [emb_func_indx][1][1]))

        # Initialize LSTMs
        self.lstm_dict = {}
        if self.lstm_settings_dict['no_subnets']:
            if not (len(self.lstm_settings_dict['active_modalities']) == 1):
                raise ValueError('Can only have one modality if no subnets')
            else:
                self.lstm_settings_dict['is_irregular'][
                    'master'] = self.lstm_settings_dict['is_irregular'][
                        self.lstm_settings_dict['active_modalities'][0]]
                if self.lstm_settings_dict['is_irregular']['master']:
                    # self.lstm_dict['master'] = nn.LSTMCell(self.feature_size_dict['master'],
                    #                                        self.lstm_settings_dict['hidden_dims']['master']).type(dtype)
                    self.lstm_dict['master'] = nn.LSTMCell(
                        self.feature_size_dict['master'],
                        self.lstm_settings_dict['hidden_dims']['master']).type(
                            dtype)
                    self.lstm_master = self.lstm_dict['master']
                else:
                    self.lstm_dict['master'] = nn.LSTM(
                        self.feature_size_dict['master'],
                        self.lstm_settings_dict['hidden_dims']['master']).type(
                            dtype)
                    self.lstm_master = self.lstm_dict['master']
        else:  # Two subnets
            self.lstm_settings_dict['is_irregular']['master'] = False
            self.lstm_dict['master'] = nn.LSTM(
                self.feature_size_dict['master'],
                self.lstm_settings_dict['hidden_dims']['master']).type(dtype)
            self.lstm_master = self.lstm_dict['master']
            for lstm in self.lstm_settings_dict['active_modalities']:
                if self.lstm_settings_dict['is_irregular'][lstm]:
                    # self.lstm_dict[lstm] = nn.LSTMCell(self.feature_size_dict[lstm],
                    #                                    self.lstm_settings_dict['hidden_dims'][lstm]).type(dtype)
                    self.lstm_dict[lstm] = nn.LSTMCell(
                        self.feature_size_dict[lstm],
                        self.lstm_settings_dict['hidden_dims'][lstm]).type(
                            dtype)
                    if lstm == 'acous':
                        self.lstm_cell_acous = self.lstm_dict[lstm]
                    else:
                        self.lstm_cell_visual = self.lstm_dict[lstm]
                else:
                    self.lstm_dict[lstm] = nn.LSTM(
                        self.feature_size_dict[lstm],
                        self.lstm_settings_dict['hidden_dims'][lstm]).type(
                            dtype)
                    if lstm == 'acous':
                        self.lstm_acous = self.lstm_dict[lstm]
                    else:
                        self.lstm_visual = self.lstm_dict[lstm]

            if self.lstm_settings_dict['visual_as_id']['visual']:
                #                self.lstm_cell_visual = self.lstm_dict['acous'].copy()
                self.lstm_visual = self.lstm_dict['acous']

        # init dropout layers
        self.dropout_dict = {}
        for drop_key, drop_val in self.lstm_settings_dict['dropout'].items():
            self.dropout_dict[drop_key] = nn.Dropout(drop_val)
            setattr(self, 'dropout_' + str(drop_key),
                    self.dropout_dict[drop_key])

        self.out = nn.Linear(self.lstm_settings_dict['hidden_dims']['master'],
                             prediction_length).type(dtype)
        self.init_hidden()
Esempio n. 10
0
    def __init__(self):
        super(Decoder, self).__init__()

        self.lstm = nn.LSTMCell(conf('emb-size'), conf('dec-hidden-size'))

        self.y_concat = nn.Linear(2 * conf('enc-hidden-size') + conf('emb-size'), conf('emb-size'))
Esempio n. 11
0
trainloader = DataLoader(traindataset,
                         batch_size=BATCH_SIZE,
                         shuffle=True,
                         num_workers=4)

testdataset = MyDataset('test.csv')
testloader = DataLoader(testdataset,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        num_workers=4)

mlp1 = MLP(EMB_SIZE).to(device)
mlp2 = MLP(2 * HIDDEN_SIZE).to(device)
mlp3 = MLP(2 * HIDDEN_SIZE).to(device)
r = Pred(HIDDEN_SIZE).to(device)
lstm = nn.LSTMCell(HIDDEN_SIZE, HIDDEN_SIZE).to(device)
embed = torch.nn.functional.one_hot

optimizer_mlp1 = torch.optim.Adam(mlp1.parameters(),
                                  lr=2e-4,
                                  weight_decay=1e-4)
optimizer_mlp2 = torch.optim.Adam(mlp2.parameters(),
                                  lr=2e-4,
                                  weight_decay=1e-4)
optimizer_mlp3 = torch.optim.Adam(mlp3.parameters(),
                                  lr=2e-4,
                                  weight_decay=1e-4)
optimizer_r = torch.optim.Adam(r.parameters(), lr=2e-4, weight_decay=1e-4)
optimizer_lstm = torch.optim.Adam(lstm.parameters(),
                                  lr=2e-4,
                                  weight_decay=1e-4)
Esempio n. 12
0
  def __init__(self, pc_embedder, address_embedder, cache_line_embedder,
               positional_embedder, lstm_hidden_size, max_attention_history,
               loss_fns=None, cache_pc_embedder=None):
    """Constructs a model to predict evictions from a EvictionEntries history.

    At each timestep t, receives:
      - pc_t: program counter of t-th memory access.
      - a_t: (cache-aligned) address of t-th memory access.
      - [l^0_t, ..., l^N_t]: the cache lines present in the cache set accessed
        by a_t. Each cache line consists of the cache-aligned address and the pc
        of the last access to that address.

    Computes:
      c_0, h_0 = zeros(lstm_hidden_size)
      c_{t + 1}, h_{t + 1} = LSTM([e(pc_t)]; e(a_t)], c_t, h_t)
      h^i = attention([h_{t - K}, ..., h_t], query=e(l^i_t)) for i = 1, ..., N
      eviction_score s^i = softmax(f(h^i))

    The line with the highest eviction score is evicted.

    Args:
      pc_embedder (embed.Embedder): embeds the program counter.
      address_embedder (embed.Embedder): embeds the address.
      cache_line_embedder (embed.Embedder): embed the cache line.
      positional_embedder (embed.Embedder): embeds positions of the access
        history.
      lstm_hidden_size (int): dimension of output of LSTM (h and c).
      max_attention_history (int): maximum number of past hidden states to
        attend over (K in the equation above).
      loss_fns (dict): maps a name (str) to a loss function (LossFunction).
        The name is used in the loss method. Defaults to top_1_log_likelihood.
      cache_pc_embedder (embed.Embedder | None): embeds the pc of each cache
        line, if provided. Otherwise cache line pcs are not embedded.
    """
    super(EvictionPolicyModel, self).__init__()
    self._pc_embedder = pc_embedder
    self._address_embedder = address_embedder
    self._cache_line_embedder = cache_line_embedder
    self._cache_pc_embedder = cache_pc_embedder
    self._lstm_cell = nn.LSTMCell(
        pc_embedder.embed_dim + address_embedder.embed_dim, lstm_hidden_size)
    self._positional_embedder = positional_embedder

    query_dim = cache_line_embedder.embed_dim
    if cache_pc_embedder is not None:
      query_dim += cache_pc_embedder.embed_dim
    self._history_attention = attention.MultiQueryAttention(
        attention.GeneralAttention(query_dim, lstm_hidden_size))
    # f(h, e(l))
    self._cache_line_scorer = nn.Linear(
        lstm_hidden_size + self._positional_embedder.embed_dim, 1)

    self._reuse_distance_estimator = nn.Linear(
        lstm_hidden_size + self._positional_embedder.embed_dim, 1)

    # Needs to be capped because of limited GPU memory
    self._max_attention_history = max_attention_history

    if loss_fns is None:
      loss_fns = {"log_likelihood": LogProbLoss()}
    self._loss_fns = loss_fns
Esempio n. 13
0
    def __init__(self,
                 num_steps,
                 x_size,
                 window_size,
                 z_what_size,
                 rnn_hidden_size,
                 encoder_net=[],
                 decoder_net=[],
                 predict_net=[],
                 embed_net=None,
                 bl_predict_net=[],
                 non_linearity='ReLU',
                 decoder_output_bias=None,
                 decoder_output_use_sigmoid=False,
                 use_masking=True,
                 use_baselines=True,
                 baseline_scalar=None,
                 scale_prior_mean=3.0,
                 scale_prior_sd=0.1,
                 pos_prior_mean=0.0,
                 pos_prior_sd=1.0,
                 likelihood_sd=0.3,
                 use_cuda=False):

        super().__init__()

        self.num_steps = num_steps
        self.x_size = x_size
        self.window_size = window_size
        self.z_what_size = z_what_size
        self.rnn_hidden_size = rnn_hidden_size
        self.use_masking = use_masking
        self.use_baselines = use_baselines
        self.baseline_scalar = baseline_scalar
        self.likelihood_sd = likelihood_sd
        self.use_cuda = use_cuda
        prototype = torch.tensor(0.).cuda() if use_cuda else torch.tensor(0.)
        self.options = dict(dtype=prototype.dtype, device=prototype.device)

        self.z_pres_size = 1
        self.z_where_size = 3
        # By making these parameters they will be moved to the gpu
        # when necessary. (They are not registered with pyro for
        # optimization.)
        self.z_where_loc_prior = nn.Parameter(
            torch.FloatTensor([scale_prior_mean, pos_prior_mean, pos_prior_mean]),
            requires_grad=False)
        self.z_where_scale_prior = nn.Parameter(
            torch.FloatTensor([scale_prior_sd, pos_prior_sd, pos_prior_sd]),
            requires_grad=False)

        # Create nn modules.
        rnn_input_size = x_size ** 2 if embed_net is None else embed_net[-1]
        rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size
        nl = getattr(nn, non_linearity)

        self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size)
        self.encode = Encoder(window_size ** 2, encoder_net, z_what_size, nl)
        self.decode = Decoder(window_size ** 2, decoder_net, z_what_size,
                              decoder_output_bias, decoder_output_use_sigmoid, nl)
        self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl)
        self.embed = Identity() if embed_net is None else MLP(x_size ** 2, embed_net, nl, True)

        self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size)
        self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl)
        self.bl_embed = Identity() if embed_net is None else MLP(x_size ** 2, embed_net, nl, True)

        # Create parameters.
        self.h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.bl_h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.bl_c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size))
        self.z_where_init = nn.Parameter(torch.zeros(1, self.z_where_size))
        self.z_what_init = nn.Parameter(torch.zeros(1, self.z_what_size))

        if use_cuda:
            self.cuda()
Esempio n. 14
0
    def __init__(self, args):
        super(JointNS, self).__init__(args)
        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3

        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids
        self.all_obj_names = args.object_list
        self.use_gt_cp = args.use_gt_cp
        self.clean_force = True

        # configs w.r.t. two losses
        self.joint_two_losses = args.joint_two_losses
        self.loss1_or_loss2 = None
        if args.loss1_w < 0.00001:
            self.loss1_or_loss2 = False  # update loss2 only
        elif args.loss2_w < 0.00001:
            self.loss1_or_loss2 = True  # update loss1 only
        self.loss1_optim, self.loss2_optim, self.joint_optim = None, None, None

        # neural force simulator
        self.use_image_feature = True
        if not self.use_image_feature:
            self.one_ns_layer = MLPNS(hidden_size=64, layer_norm=False)
        else:
            self.one_ns_layer = NSWithImageFeature(hidden_size=64,
                                                   layer_norm=False,
                                                   image_feature_dim=512)
        # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names}

        # force predictor networks.
        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc
        self.feature_extractor.eval()
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3
        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            512, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            NoGradEnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()

        self.force_predictor_modules = [
            self.feature_extractor, self.image_embed,
            self.contact_point_image_embed, self.input_object_embed,
            self.contact_point_input_object_embed, self.state_embed,
            self.lstm_encoder, self.contact_point_encoder,
            self.contact_point_decoder, self.forces_directions_decoder
        ]

        # see gradients for debugging
        self.vis_grad = args.vis_grad
        self.grad_vis = None

        self.train_res = args.train_res or self.vis_grad
Esempio n. 15
0
    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 input_feed=True,
                 label_smoothing=0.):
        super(NMT, self).__init__()

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.input_feed = True

        self.src_code_embed = nn.Embedding(len(vocab.src_code),
                                           embed_size,
                                           padding_idx=vocab.src_code['<pad>'])
        self.src_nl_embed = nn.Embedding(len(vocab.src_nl),
                                         embed_size,
                                         padding_idx=vocab.src_nl['<pad>'])

        self.tgt_embed = nn.Embedding(len(vocab.tgt),
                                      embed_size,
                                      padding_idx=vocab.tgt['<pad>'])

        self.code_encoder_lstm = nn.LSTM(embed_size,
                                         hidden_size,
                                         bidirectional=True)
        self.nl_encoder_lstm = nn.LSTM(embed_size,
                                       hidden_size,
                                       bidirectional=True)

        decoder_lstm_input = embed_size + (
            4 * hidden_size) if self.input_feed else embed_size

        self.decoder_lstm = nn.LSTMCell(decoder_lstm_input, hidden_size)

        # attention: dot product attention
        # project source encoding to decoder rnn's state spacexxxx
        self.att_src_code_linear = nn.Linear(hidden_size * 2,
                                             hidden_size,
                                             bias=False)
        self.att_src_nl_linear = nn.Linear(hidden_size * 2,
                                           hidden_size,
                                           bias=False)

        # transformation of decoder hidden states and context vectors before reading out target words
        # this produces the `attentional vector` in (Luong et al., 2015)
        self.att_vec_linear = nn.Linear(hidden_size * 2 * 2 + hidden_size,
                                        hidden_size,
                                        bias=False)

        # prediction layer of the target vocabulary
        self.readout = nn.Linear(hidden_size, len(vocab.tgt), bias=False)

        # dropout layer
        self.dropout = nn.Dropout(self.dropout_rate)

        # initialize the decoder's state and cells with encoder hidden states
        self.decoder_cell_init = nn.Linear(hidden_size * 2, hidden_size)

        # copy related layers
        self.p_gen_linear = nn.Linear(hidden_size * 9 + embed_size, 3)

        self.label_smoothing = label_smoothing
        if label_smoothing > 0.:
            self.label_smoothing_loss = LabelSmoothingLoss(
                label_smoothing,
                tgt_vocab_size=len(vocab.tgt),
                padding_idx=vocab.tgt['<pad>'])
Esempio n. 16
0
    def __init__(self, agent_params, **kwargs):
        # call the super-class init
        super(ActorCritic, self).__init__()
        self.gamma = agent_params['gamma']  # discount factor
        self.input_dims = agent_params['input_dims']
        self.action_dims = agent_params['action_dims']

        if 'rfsize' not in agent_params.keys():
            self.rfsize = kwargs.get('rfsize', 4)
        else:
            self.rfsize = agent_params['rfsize']
        if 'padding' not in agent_params.keys():
            self.padding = kwargs.get('padding', 1)
        else:
            self.padding = agent_params['padding']
        if 'dilation' not in agent_params.keys():
            self.dilation = 1
        else:
            self.dilation = kwargs.get('dilation', 1)
        if 'stride' not in agent_params.keys():
            self.stride = kwargs.get('stride', 1)
        else:
            self.stride = agent_params['stride']
        if 'batch_size' not in agent_params.keys():
            self.batch_size = kwargs.get('batch_size', 1)
        else:
            self.batch_size = agent_params['batch_size']

        self.use_SR = kwargs.get('use_SR', True)

        if 'hidden_types' in agent_params.keys():

            if len(agent_params['hidden_dims']) != len(
                    agent_params['hidden_types']):
                raise Exception(
                    'Incorrect specification of hidden layer dimensions')

            hidden_types = agent_params['hidden_types']
            # create lists for tracking hidden layers
            self.hidden = nn.ModuleList()
            self.hidden_dims = agent_params['hidden_dims']

            self.hx = []
            self.cx = []
            # calculate dimensions for each layer
            for ind, htype in enumerate(hidden_types):
                if htype not in ['linear', 'lstm', 'gru', 'conv', 'pool']:
                    raise Exception(
                        f'Unrecognized type for hidden layer {ind}')
                if ind == 0:
                    input_d = self.input_dims
                else:
                    if hidden_types[ind - 1] in [
                            'conv', 'pool'
                    ] and not htype in ['conv', 'pool']:
                        input_d = int(np.prod(self.hidden_dims[ind - 1]))

                    else:
                        input_d = self.hidden_dims[ind - 1]

                if htype in ['conv', 'pool']:
                    output_d = tuple(self.conv_output(input_d))
                    self.hidden_dims[ind] = output_d

                else:
                    output_d = self.hidden_dims[ind]

                # construct the layer
                if htype is 'linear':
                    self.hidden.append(nn.Linear(input_d, output_d))
                    self.hx.append(None)
                    self.cx.append(None)
                elif htype is 'lstm':
                    self.hidden.append(nn.LSTMCell(input_d, output_d))
                    self.hx.append(
                        Variable(torch.zeros(self.batch_size, output_d)))
                    self.cx.append(
                        Variable(torch.zeros(self.batch_size, output_d)))
                elif htype is 'gru':
                    self.hidden.append(nn.GRUCell(input_d, output_d))
                    self.hx.append(
                        Variable(torch.zeros(self.batch_size, output_d)))
                    self.cx.append(None)
                elif htype is 'conv':
                    in_channels = input_d[0]
                    out_channels = output_d[0]
                    self.hidden.append(
                        nn.Conv2d(in_channels,
                                  out_channels,
                                  kernel_size=self.rfsize,
                                  padding=self.padding,
                                  stride=self.stride,
                                  dilation=self.dilation))
                    self.hx.append(None)
                    self.cx.append(None)
                elif htype is 'pool':
                    self.hidden.append(
                        nn.MaxPool2d(kernel_size=self.rfsize,
                                     padding=self.padding,
                                     stride=self.stride,
                                     dilation=self.dilation))
                    self.hx.append(None)
                    self.cx.append(None)

            # create the actor and critic layers
            self.layers = [self.input_dims
                           ] + self.hidden_dims + [self.action_dims]
            self.output = nn.ModuleList([
                nn.Linear(output_d, self.action_dims),  #actor
                nn.Linear(output_d, 1)  #critic
            ])
            if self.use_SR:
                self.SR = nn.Linear(output_d, output_d)  # psi

        else:
            self.layers = [self.input_dims, self.action_dims]
            self.output = nn.ModuleList([
                nn.Linear(input_dimensions, action_dimensions),  # ACTOR
                nn.Linear(input_dimensions, 1)
            ])  # CRITIC
        self.output_d = self.hidden_dims[-1]

        self.saved_actions = []
        self.saved_rewards = []
        self.saved_phi = []
        self.saved_psi = []
        '''
		main_params = []
		SR_params = []
		for name, para in self.named_parameters():
			if name[0:2] == 'SR':
				SR_params.append(para)
			else:
				main_params.append(para)

		self.SR_opt = opt([{'params': SR_params,
							'lr': 0.01 * agent_params.eta}])  # opt([{'params': freeze, 'lr': 0.0}, {'params': unfreeze, 'lr': agent_params['eta']}], lr=0.0)
		self.optimizer = opt(main_params, lr=agent_params.eta)
		'''
        self.optimizer = optim.Adam(self.parameters(), lr=agent_params['eta'])
Esempio n. 17
0
def LSTMCell(input_size, hidden_size, **kwargs):
    m = nn.LSTMCell(input_size, hidden_size, **kwargs)
    for name, param in m.named_parameters():
        if 'weight' in name or 'bias' in name:
            param.data.uniform_(-0.1, 0.1)
    return m
Esempio n. 18
0
 def test_lstm_cell(self):
     model = nn.LSTMCell(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE)
     input = torch.randn(BATCH_SIZE, RNN_INPUT_SIZE)
     h0 = torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE)
     c0 = torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE)
     self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=(input, (h0, c0)), use_gpu=False)
Esempio n. 19
0
    def __init__(self, num_inputs, action_space):
        super(ActorCritic, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        self.lstm = nn.LSTMCell(32 * 3 * 3, 256)

        num_outputs = action_space.n
        self.critic_linear = nn.Linear(256, 1)
        self.actor_linear = nn.Linear(256, num_outputs)

        ################################################################
        self.icm_conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
        self.icm_conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.icm_conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.icm_conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        # self.icm_lstm = nn.LSTMCell(32 * 3 * 3, 256)

        self.inverse_linear1 = nn.Linear(288 + 288, 256)
        self.inverse_linear2 = nn.Linear(256, num_outputs)

        self.forward_linear1 = nn.Linear(288 + num_outputs, 256)
        self.forward_linear2 = nn.Linear(256, 288)

        # self.inverse_linear1 = nn.Linear(256 + 256, 256)
        # self.inverse_linear2 = nn.Linear(256, num_outputs)

        # self.forward_linear1 = nn.Linear(256 + num_outputs, 256)
        # self.forward_linear2 = nn.Linear(256, 256)
        ################################################################
        self.apply(weights_init)
        self.inverse_linear1.weight.data = normalized_columns_initializer(
            self.inverse_linear1.weight.data, 0.01)
        self.inverse_linear1.bias.data.fill_(0)
        self.inverse_linear2.weight.data = normalized_columns_initializer(
            self.inverse_linear2.weight.data, 1.0)
        self.inverse_linear2.bias.data.fill_(0)

        self.forward_linear1.weight.data = normalized_columns_initializer(
            self.forward_linear1.weight.data, 0.01)
        self.forward_linear1.bias.data.fill_(0)
        self.forward_linear2.weight.data = normalized_columns_initializer(
            self.forward_linear2.weight.data, 1.0)
        self.forward_linear2.bias.data.fill_(0)

        '''
        self.icm_lstm.bias_ih.data.fill_(0)
        self.icm_lstm.bias_hh.data.fill_(0)
        '''
        ################################################################

        self.actor_linear.weight.data = normalized_columns_initializer(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = normalized_columns_initializer(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
Esempio n. 20
0
 def test_lstm_cell_is_half(self):
     cell = nn.LSTMCell(self.h, self.h)
     self.run_cell_test(cell, state_tuple=True)
Esempio n. 21
0
    def __init__(self, args):
        super(Graph_MFN, self).__init__()
        # print("Graph_MFN initialization ....")
        # print(args)
        self.d_l, self.d_a, self.d_v = args.feature_dims
        self.dh_l, self.dh_a, self.dh_v = args.hidden_dims_l, args.hidden_dims_a, args.hidden_dims_v
        total_h_dim = self.dh_l + self.dh_a + self.dh_v
        self.mem_dim = args.memsize
        self.inner_node_dim = args.inner_node_dim

        self.singleton_l_size = args.hidden_dims_l
        self.singleton_a_size = args.hidden_dims_a
        self.singleton_v_size = args.hidden_dims_v

        # Here Changed! (rm window_dim)
        # window_dim = args.windowsize
        output_dim = args.num_classes
        # Here Changed! (rm attInShape, use inner_node_dim instead)
        # attInShape = total_h_dim * window_dim
        # gammaInShape = attInShape + self.mem_dim
        gammaInShape = self.inner_node_dim + self.mem_dim  # Todo : we need get inner_node_dim from args.
        final_out = total_h_dim + self.mem_dim
        # h_att1 = args.NN1Config_shapes
        h_att2 = args.NNConfig_shapes
        h_gamma1 = args.gamma1Config_shapes
        h_gamma2 = args.gamma2Config_shapes
        h_out = args.outConfig_shapes
        # att1_dropout = args.NN1Config_drop
        att2_dropout = args.NNConfig_drop
        gamma1_dropout = args.gamma1Config_drop
        gamma2_dropout = args.gamma2Config_drop
        out_dropout = args.outConfig_drop

        self.lstm_l = nn.LSTMCell(self.d_l, self.dh_l)
        self.lstm_a = nn.LSTMCell(self.d_a, self.dh_a)
        self.lstm_v = nn.LSTMCell(self.d_v, self.dh_v)

        # Here Changed! Todo : add Arg param singleton_l singleton_a singleton_v
        self.l_transform = nn.Linear(self.dh_l * 2, self.singleton_l_size)
        self.a_transform = nn.Linear(self.dh_a * 2, self.singleton_a_size)
        self.v_transform = nn.Linear(self.dh_v * 2, self.singleton_v_size)

        # Here Changed! (initialize the DFG part) Todo : add Arg param inner node dimension.
        pattern_model = nn.Sequential(nn.Linear(100, self.inner_node_dim)).to(
            args.device)
        efficacy_model = nn.Sequential(nn.Linear(100, self.inner_node_dim)).to(
            args.device
        )  # Note : actually here inner_node_dim can change arbitrarily
        self.graph_mfn = DynamicFusionGraph(pattern_model, [
            self.singleton_l_size, self.singleton_a_size, self.singleton_v_size
        ], self.inner_node_dim, efficacy_model, args.device).to(args.device)
        # Here Changed!  (delete att1 )
        # self.att1_fc1 = nn.Linear(attInShape, h_att1)
        # self.att1_fc2 = nn.Linear(h_att1, attInShape)
        # self.att1_dropout = nn.Dropout(att1_dropout)

        # Here Changed! (alter the dim param.)
        self.att2_fc1 = nn.Linear(
            self.inner_node_dim, h_att2
        )  # Note: might (inner_node_dim = self.mem_dim) is a common choice.
        self.att2_fc2 = nn.Linear(h_att2, self.mem_dim)
        self.att2_dropout = nn.Dropout(att2_dropout)

        self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1)
        self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim)
        self.gamma1_dropout = nn.Dropout(gamma1_dropout)

        self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2)
        self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim)
        self.gamma2_dropout = nn.Dropout(gamma2_dropout)

        self.out_fc1 = nn.Linear(final_out, h_out)
        self.out_fc2 = nn.Linear(h_out, output_dim)
        self.out_dropout = nn.Dropout(out_dropout)
 def __init__(self, ins = 2, es = 8, hs = 16):
     super(EncoderRNN, self).__init__()
     self.hs = hs
     self.linear1 = nn.Linear(ins, es)
     self.lstm1 = nn.LSTMCell(es, hs)
     self.gru1 = nn.GRUCell(es, hs)
Esempio n. 23
0
    def __init__(self,
                 obs_space,
                 action_space,
                 use_memory=False,
                 use_text=False):
        super().__init__()

        # Decide which components are enabled
        self.use_text = use_text
        self.use_memory = use_memory
        self.recurrent = use_memory

        # Define image embedding
        image_chans = obs_space["image"][2]
        self.image_conv = nn.Sequential(
            nn.Conv2d(image_chans, 16, (2, 2)),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(16, 32, (2, 2)),
            nn.ReLU()  #,
            # nn.Conv2d(32, 64, (2, 2)),
            # nn.ReLU()
        )
        n = obs_space["image"][0]
        m = obs_space["image"][1]
        # self.image_embedding_size = ((n-1)//2-2)*((m-1)//2-2)*64   # original. image_embedding_size is basically the number of elements at output of self.image_conv(x), not accounting for batch size.
        self.image_embedding_size = 32 * 6  # 32 is outchan, 6 is h*w

        # Define memory
        if self.use_memory:
            self.memory_rnn = nn.LSTMCell(self.image_embedding_size,
                                          self.semi_memory_size)

        # Define text embedding
        if self.use_text:
            self.word_embedding_size = 32
            self.word_embedding = nn.Embedding(obs_space["text"],
                                               self.word_embedding_size)
            self.text_embedding_size = 128
            self.text_rnn = nn.GRU(self.word_embedding_size,
                                   self.text_embedding_size,
                                   batch_first=True)

        # Resize image embedding
        self.embedding_size = self.semi_memory_size
        if self.use_text:
            self.embedding_size += self.text_embedding_size

        # Define actor's model
        if isinstance(action_space, gym.spaces.Discrete):
            self.actor = nn.Sequential(nn.Linear(self.embedding_size, 16),
                                       nn.Tanh(),
                                       nn.Linear(16, action_space.n))
        else:
            raise ValueError("Unknown action space: " + str(action_space))

        # Define critic's model
        self.critic = nn.Sequential(nn.Linear(self.embedding_size, 16),
                                    nn.Tanh(), nn.Linear(16, 1))

        # Initialize parameters correctly
        self.apply(initialize_parameters)
 def __init__(self, input_size, hidden_size):
     super(AdaptiveLSTMCell, self).__init__()
     self.lstm_cell = nn.LSTMCell(input_size, hidden_size)
     self.x_gate = nn.Linear(input_size, hidden_size)
     self.h_gate = nn.Linear(hidden_size, hidden_size)
Esempio n. 25
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorself.ch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               hidden_size,
                               bias=True,
                               dropout=self.dropout_rate,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(
            embed_size + hidden_size, hidden_size,
            bias=True)  # why add embed+hidden for input size?
        self.h_projection = nn.Linear(
            hidden_size * 2, hidden_size,
            bias=False)  # prj output of last h_state of encode (R^2h) to R^h
        self.c_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
        self.att_projection = nn.Linear(hidden_size * 2,
                                        hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(
            hidden_size * 3, hidden_size,
            bias=False)  # use after combined attention output and h_decode
        self.target_vocab_projection = nn.Linear(
            hidden_size, len(vocab.tgt), bias=False)  # for softmax of last
        self.dropout = nn.Dropout(self.dropout_rate)
Esempio n. 26
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               num_layers=1,
                               bias=True,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(
            embed_size + self.hidden_size,
            self.hidden_size,
            bias=True,
        )
        self.h_projection = nn.Linear(2 * embed_size, embed_size, False)
        self.c_projection = nn.Linear(2 * embed_size, embed_size, False)
        self.att_projection = nn.Linear(2 * embed_size, embed_size, False)
        self.combined_output_projection = nn.Linear(3 * embed_size, embed_size,
                                                    False)
        self.target_vocab_projection = nn.Linear(
            embed_size, len(self.vocab.tgt), False
        )  # all these layers are called projection because project input dim vector to output dim vector
        self.dropout = nn.Dropout(dropout_rate)
Esempio n. 27
0
 def __init__(self, latents, actions, hiddens, gaussians):
     super().__init__(latents, actions, hiddens, gaussians)
     self.rnn = nn.LSTMCell(latents + actions, hiddens)
Esempio n. 28
0
    def __init__(self,
                 cell='gru',
                 use_baseline=True,
                 n_actions=10,
                 n_units=64,
                 fusion_dim=128,
                 n_input=76,
                 n_hidden=128,
                 demo_dim=17,
                 n_output=1,
                 dropout=0.0,
                 lamda=0.5,
                 device='cpu'):
        super(Agent, self).__init__()

        self.cell = cell
        self.use_baseline = use_baseline
        self.n_actions = n_actions
        self.n_units = n_units
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.dropout = dropout
        self.lamda = lamda
        self.fusion_dim = fusion_dim
        self.demo_dim = demo_dim
        self.device = device

        self.agent1_action = []
        self.agent1_prob = []
        self.agent1_entropy = []
        self.agent1_baseline = []
        self.agent2_action = []
        self.agent2_prob = []
        self.agent2_entropy = []
        self.agent2_baseline = []

        self.agent1_fc1 = nn.Linear(self.n_hidden + self.demo_dim,
                                    self.n_units)
        self.agent2_fc1 = nn.Linear(self.n_input + self.demo_dim, self.n_units)
        self.agent1_fc2 = nn.Linear(self.n_units, self.n_actions)
        self.agent2_fc2 = nn.Linear(self.n_units, self.n_actions)
        if use_baseline == True:
            self.agent1_value = nn.Linear(self.n_units, 1)
            self.agent2_value = nn.Linear(self.n_units, 1)

        if self.cell == 'lstm':
            self.rnn = nn.LSTMCell(self.n_input, self.n_hidden)
        else:
            self.rnn = nn.GRUCell(self.n_input, self.n_hidden)

        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.0)
            elif 'weight' in name:
                nn.init.orthogonal_(param)

        if dropout > 0.0:
            self.nn_dropout = nn.Dropout(p=dropout)
        self.init_h = nn.Linear(self.demo_dim, self.n_hidden)
        self.init_c = nn.Linear(self.demo_dim, self.n_hidden)
        self.fusion = nn.Linear(self.n_hidden + self.demo_dim, self.fusion_dim)
        self.output = nn.Linear(self.fusion_dim, self.n_output)

        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
Esempio n. 29
0
 def _set_cell(self):
     return nn.LSTMCell(input_size=self.in_dims + self.pb_dims,
                        hidden_size=self.unit_nums)
Esempio n. 30
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bias=True,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # W_{h}
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # W_{c}
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)  #  W_{attProj}
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)  # W_{u}
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)  # W_vocab
        self.dropout = nn.Dropout(p=self.dropout_rate)