def __init__(self): super(CaptionNet, self).__init__() # Make VGG net self.vgg = VGG(make_layers(VGG_MODEL_CFG)) self.vgg.load_state_dict(torch.load(VGG_MODEL_FILE)) # Freeze all VGG layers for param in self.vgg.parameters(): param.requires_grad = False self.vgg_to_hidden = nn.Sequential( nn.Linear(2*4096, RNN_HIDDEN_SIZE), nn.ReLU(True), nn.Dropout(), ) # Recurrent layer self.gru_cell = ForgetfulGRUCell( input_size = WORDVEC_SIZE, hidden_size = RNN_HIDDEN_SIZE, ) # Linear layer to convert hidden layer to word in vocab self.hidden_to_vocab = nn.Linear(RNN_HIDDEN_SIZE, VOCABULARY_SIZE) self.word_embeddings = word_embedding.WordEmbedding()
def __init__(self, mode): self.word_embeddings = word_embedding.WordEmbedding() self.mode = mode if mode == 'train': self.json_file = TRAIN_JSON else: self.json_file = VALID_JSON with open(self.json_file) as jsonf: data = json.load(jsonf) self.captions = data['annotations']
def __init__(self, data_folder, vocab_size, max_in_seq_len, max_data_size, embed_size = -1): self.max_in_seq_len = max_in_seq_len self.max_data_size = max_data_size self.batch_index = 0; # Get words in the form of numbers : 0 - vocabsize ; 0 for unk data = data_helper.get_tokenized_data(data_folder, vocab_size) tokenized_in_seq_path_train, tokenized_label_path_train = data[0] tokenized_in_seq_path_valid, tokenized_label_path_valid = data[1] tokenized_in_seq_path_test, tokenized_label_path_test = data[2] in_vocab_path, label_vocab_path = data[3] # read the data in form of numbers into memory self.in_seq_train = self.read_data_into_memory(tokenized_in_seq_path_train); self.labels_train = self.read_data_into_memory(tokenized_label_path_train); self.in_seq_valid = self.read_data_into_memory(tokenized_in_seq_path_valid); self.labels_valid = self.read_data_into_memory(tokenized_label_path_valid); self.in_seq_test = self.read_data_into_memory(tokenized_in_seq_path_test); self.labels_test = self.read_data_into_memory(tokenized_label_path_test); self.in_vocab_path = in_vocab_path; self.vocab_size = self.get_vocab_size(); if(len(self.in_seq_train) != len(self.labels_train)) : raise ValueError("Number of train labels != Number of train inputs : %d != %d",len(self.in_seq_train), len(self.labels_train)) if(len(self.in_seq_valid) != len(self.labels_valid)) : raise ValueError("Number of valid labels != Number of valid inputs : %d != %d",len(self.in_seq_valid), len(self.labels_valid)) if(len(self.in_seq_test) != len(self.labels_test)) : raise ValueError("Number of test labels != Number of test inputs : %d != %d",len(self.in_seq_test), len(self.labels_test)) self.no_of_class_labels = self.get_number_of_labels(); self.embed_size = embed_size self.word_embedding = None # construct word embedding if required by the model if embed_size > 0: self.word_embedding = word_embedding.WordEmbedding( self.in_seq_train, self.in_seq_test, self.vocab_size, embed_size, 2, #context_window 1 #context_size )
def __init__(self, keep_pos=['NN', 'NNS', 'JJ', 'VB']): self.wordEmbeder = word_embedding.WordEmbedding() # POS to keep self.keep_pos = keep_pos
attention_output = self.attention(input=input, memory=input) # Position-wise Feed Forwardネットワーク層 # 単語列の位置毎に独立処理する全結合FFN ffn_output = self.ffn(input=attention_output) # 最終層にレイヤノーマライゼーションをかけて出力 return self.output_normalization(ffn_output) if __name__ == '__main__': vocab_size = 10 emb_dim = 2 dor = 0.1 # トークン列 inputs = tf.constant([[0, 2, 3], [1, 4, 3]]) # embeddingまでの処理 import word_embedding emb = word_embedding.WordEmbedding(vocab_size=vocab_size, embedding_dim=emb_dim) enc_inputs = emb(inputs) #print(enc_inputs) # Encoder encoder = Encoder(vocab_size=10, hidden_dim=2, dropout_rate=dor) enc_output = encoder(enc_inputs) print(enc_output)