def __init__(self, embed_matrix): self.graph = tf.Graph() with self.graph.as_default(): # Input self.input_matrix = tf.constant(embed_matrix, name="embed_matrix") self.word_ids = tf.placeholder(tf.int32, shape=[None], name="word_ids") # 圧縮したい分散表現 (batch_size,embed_size) self.input_embeds = tf.nn.embedding_lookup(self.input_matrix, self.word_ids, name="input_embeds") # Codebooks self.codebooks = tf.get_variable("codebook", [hp.M * hp.K, hp.embed_size]) # Encoding self.logits = encode(self.input_embeds) # (batch_size, M, K) # Discretization self.D = gumbel_softmax(self.logits, hp.tau_value) # (batch_size,M,K) self.gumbel_output = tf.reshape( self.D, [-1, hp.M * hp.K]) # (batch_size, M * K) self.maxp = tf.reduce_mean(tf.reduce_max(self.D, axis=2)) # Decoding self.output_embeds = decode( self.gumbel_output, self.codebooks) # (batch_size, M*K) * (M*K, embed_size) # Loss self.loss = tf.reduce_mean(0.5 * tf.reduce_sum( (self.output_embeds - self.input_embeds)**2, axis=1), name="loss") # Optimization self.train_vars = tf.trainable_variables() self.grads, self.global_norm = tf.clip_by_global_norm( tf.gradients(self.loss, self.train_vars), clip_norm=0.001) self.global_norm = tf.identity(self.global_norm, name="global_norm") self.optimizer = tf.train.AdamOptimizer(0.0001) self.train_op = self.optimizer.apply_gradients(zip( self.grads, self.train_vars), name="train_op")
def __init__(self, embed_matrix): self.graph = tf.Graph() with self.graph.as_default(): # Input self.input_matrix = tf.constant(embed_matrix, name="embed_matrix") self.word_ids = tf.placeholder(tf.int32, shape=[None], name="word_ids") # 圧縮したい分散表現 (batch_size,embed_size) self.input_embeds = tf.nn.embedding_lookup(self.input_matrix, self.word_ids, name="input_embeds") # Codebooks self.codebooks = tf.get_variable("codebook", [hp.M * hp.K, hp.embed_size]) # Encoding self.logits = encode(self.input_embeds) # (batch_size, M, K) self.codes = tf.cast(tf.argmax(self.logits, axis=2), tf.int32) # ~ (B, M) # Reconstruct self.offset = tf.range(hp.M, dtype="int32") * hp.K self.codes_with_offset = self.codes + self.offset[None, :] self.selected_vectors = tf.gather(self.codebooks, self.codes_with_offset) # ~ (B, M, H) self.reconstructed_embed = tf.reduce_sum(self.selected_vectors, axis=1) # ~ (B, H)
modules.yaz("\nKullanabileceğiniz komutlar şunlardır:") modules.yaz(""" cikis, c : yazilimi durdurmak icin kullanilir. decode, d: sifreli yaziyi cozmek icin kullanılır. (Aktif degil) encode, e: sifrelemek icin kullanılır. yardim, y: yardim yazisini gosterir. """) yardim() while True: #Dongu baslangaci komut = input("\n>>> ") if komut in ["cikis", "c"]: break elif komut in ["encode", "e"]: text = input("Encode edilecek yazi: ") modules.yaz(modules.encode(text)) elif komut in ["decode", "d"]: text = input("Decode edilecek yazi: ") modules.yaz(modules.decode(text)) elif komut in ["yardim", "y"]: yardim() else: modules.yaz("\nVar olan bir komutu yazmadınız.\n")
def test_EncoderString_CorrectEncoding(self): y = ['cat', 'dog', 'dog'] cdict = {'cat': np.array([1, 0]), 'dog': np.array([0, 1])} y1 = encode(y, cdict) y2 = np.array([[1, 0], [0, 1], [0, 1]]) self.assertTrue(np.allclose(y1, y2))
def test_Encoder2dim_CorrectEncoding(self): y = np.array([[1], [2], [2]]) cdict = {1: np.array([1, 0]), 2: np.array([0, 1])} y1 = encode(y, cdict) y2 = np.array([[1, 0], [0, 1], [0, 1]]) self.assertTrue(np.allclose(y1, y2))