예제 #1
0
    def __init__(self, args, is_training=True, batch=True):
        self.args = args

        if not is_training:    
            args.seq_length = 1

        if not batch:
            args.batch_size = 1

        if args.model == 'rnn':
            cell = rnn_cell.BasicRNNCell(args.rnn_size)
        elif args.model == 'gru':
            cell = rnn_cell.GRUCell(args.rnn_size)
        elif args.model == 'lstm':
            cell = rnn_cell.BasicLSTMCell(args.rnn_size)
        else:
            raise Exception("model type not supported: {}".format(args.model))

        self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data     = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets        = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state  = self.cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable('softmax_b', [args.vocab_size])
            
            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
                inputs    = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs    = [tf.squeeze(i, [1]) for i in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, 
            self.cell, loop_function=None if is_training else loop, scope='rnn')
        output      = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs  = tf.nn.softmax(self.logits)
        loss        = seq2seq.sequence_loss_by_example([self.logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])],
            args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr       = tf.Variable(0.0, trainable = False)
        tvars         = tf.trainable_variables()
        grads, _      = tf.clip_by_global_norm(tf.gradients(self.cost, tvars, aggregation_method=2), args.grad_clip)
        optimizer     = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #2
0
    def __init__(self, args, is_training=True):
        self.args = args

        if not is_training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            self.cell = rnn_cell.BasicRNNCell(args.rnn_size)
        elif args.model == 'gru':
            self.cell = rnn_cell.GRUCell(args.rnn_size)
        elif args.model == 'lstm':
            self.cell = rnn_cell.BasicLSTMCell(args.rnn_size)
        else:
            raise Exception('model type not supported: {}'.format(args.model))

        self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers)

        self.input_data    = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets       = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Target replication
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2])
            softmax_b = tf.get_variable('softmax_b', [2])

            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
                inputs    = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs    = [tf.squeeze(i, [1]) for i in inputs]

            outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, 
                self.cell, loop_function=None)

        output_tf   = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output_tf, softmax_w, softmax_b)
        self.probs  = tf.nn.softmax(self.logits)
        
        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

        self.final_state = last_state
        self.lr          = tf.Variable(0.0, trainable = False)
        tvars            = tf.trainable_variables()
        grads, _         = tf.clip_by_global_norm(tf.gradients(self.cost, tvars, aggregation_method=2), args.grad_clip)
        optimizer        = tf.train.AdamOptimizer(self.lr)
        self.train_op    = optimizer.apply_gradients(zip(grads, tvars))
예제 #3
0
    def __init__(self, config, is_training=False):
        self.config = config
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        vocab_size = config.vocab_size
        self.max_grad_norm = config.max_grad_norm
        self.use_lstm = config.use_lstm

        # Placeholders for inputs.
        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.initial_state = array_ops.zeros(
            array_ops.pack([self.batch_size, self.num_steps]),
            dtype=tf.float32).set_shape([None, self.num_steps])

        embedding = tf.get_variable(
            'embedding', [self.config.vocab_size, self.config.hidden_size])

        # Set up ACT cell and inner rnn-type cell for use inside the ACT cell.
        with tf.variable_scope("rnn"):
            if self.use_lstm:
                inner_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
            else:
                inner_cell = rnn_cell.GRUCell(self.config.hidden_size)

        with tf.variable_scope("ACT"):

            act = ACTCell(self.config.hidden_size,
                          inner_cell,
                          config.epsilon,
                          max_computation=config.max_computation,
                          batch_size=self.batch_size)

        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.config.num_steps, inputs)
        ]

        self.outputs, final_state = rnn(act, inputs, dtype=tf.float32)

        # Softmax to get probability distribution over vocab.
        output = tf.reshape(tf.concat(1, self.outputs), [-1, hidden_size])
        softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        self.logits = tf.matmul(
            output,
            softmax_w) + softmax_b  # dim (numsteps*batchsize, vocabsize)

        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)

        # Add up loss and retrieve batch-normalised ponder cost: sum N + sum Remainder.
        ponder_cost = act.calculate_ponder_cost(
            time_penalty=self.config.ponder_time_penalty)
        self.cost = (tf.reduce_sum(loss) / batch_size) + ponder_cost
        self.final_state = self.outputs[-1]

        if is_training:
            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                              self.max_grad_norm)
            optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #4
0
	def __init__(self, args, is_training=True):

		if not is_training:
			seq_length = 1
		else:
			seq_length = args.seq_length

		if args.model == 'rnn':
			cell_gen = rnn_cell.BasicRNNCell(args.rnn_size)
			cell_dis = rnn_cell.BasicRNNCell(args.rnn_size)
		elif args.model == 'gru':
			cell_gen = rnn_cell.GRUCell(args.rnn_size)
			cell_dis = rnn_cell.GRUCell(args.rnn_size)
		elif args.model == 'lstm':
			cell_gen = rnn_cell.BasicLSTMCell(args.rnn_size)
			cell_dis = rnn_cell.BasicLSTMCell(args.rnn_size)
		else:
			raise Exception('model type not supported: {}'.format(args.model))

		# Pass the generated sequences and targets (1)
		with tf.name_scope('input'):
			with tf.name_scope('data'):
				self.input_data  = tf.placeholder(tf.int32, [args.batch_size, seq_length])
			with tf.name_scope('targets'):
				self.targets     = tf.placeholder(tf.int32, [args.batch_size, seq_length])

		############
		# Generator
		############
		with tf.variable_scope('generator'):
			self.cell_gen = rnn_cell.MultiRNNCell([cell_gen] * args.num_layers)
			self.initial_state_gen = self.cell_gen.zero_state(args.batch_size, tf.float32)	

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
				softmax_b = tf.get_variable('softmax_b', [args.vocab_size])
				
				with tf.device('/cpu:0'):
					embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
					inputs_gen = tf.split(1, seq_length, tf.nn.embedding_lookup(
						embedding, self.input_data))
					inputs_gen = [tf.squeeze(i, [1]) for i in inputs_gen]

			outputs_gen, last_state_gen = seq2seq.rnn_decoder(inputs_gen, self.initial_state_gen, 
				self.cell_gen, loop_function=None)
			
			self.logits_sequence = []
			for output_gen in outputs_gen:
				logits_gen  = tf.nn.xw_plus_b(output_gen, softmax_w, softmax_b)
				self.logits_sequence.append(logits_gen)

			self.final_state_gen = last_state_gen

		################
		# Discriminator
		################
		with tf.variable_scope('discriminator'):
			self.cell_dis = rnn_cell.MultiRNNCell([cell_dis] * args.num_layers)
			self.initial_state_dis = self.cell_dis.zero_state(args.batch_size, tf.float32)

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2])
				softmax_b = tf.get_variable('softmax_b', [2])

				inputs_dis = []
				embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
				for logit in self.logits_sequence:
					inputs_dis.append(tf.matmul(logit, embedding))
					# inputs_dis.append(tf.matmul(tf.nn.softmax(logit), embedding))
					
				outputs_dis, last_state_dis = seq2seq.rnn_decoder(inputs_dis,
					self.initial_state_dis, self.cell_dis, loop_function=None)

			probs, logits = [], []
			for output_dis in outputs_dis:
				logit = tf.nn.xw_plus_b(output_dis, softmax_w, softmax_b)
				prob = tf.nn.softmax(logit)
				logits.append(logit)
				probs.append(prob)

			with tf.name_scope('summary'):
				probs      = tf.pack(probs)
				probs_real = tf.slice(probs, [0,0,1], [args.seq_length, args.batch_size, 1])
				variable_summaries(probs_real, 'probability of real')

			self.final_state_dis = last_state_dis

		#########
		# Train
		#########
		with tf.name_scope('train'):
			gen_loss = seq2seq.sequence_loss_by_example(
				logits,
				tf.unpack(tf.transpose(self.targets)), 
				tf.unpack(tf.transpose(tf.ones_like(self.targets, dtype=tf.float32))))

			self.gen_cost = tf.reduce_sum(gen_loss) / args.batch_size
			tf.scalar_summary('training loss', self.gen_cost)
			self.lr_gen = tf.Variable(0.0, trainable = False)		
			self.tvars 	= tf.trainable_variables()
			gen_vars    = [v for v in self.tvars if not v.name.startswith("discriminator/")]

			if is_training:
				gen_grads            = tf.gradients(self.gen_cost, gen_vars)
				self.all_grads       = tf.gradients(self.gen_cost, self.tvars)
				gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, args.grad_clip)
				gen_optimizer        = tf.train.AdamOptimizer(self.lr_gen)
				self.gen_train_op    = gen_optimizer.apply_gradients(
											zip(gen_grads_clipped, gen_vars))				

		with tf.name_scope('summary'):
			with tf.name_scope('weight_summary'):
				for v in self.tvars:
					variable_summaries(v, v.op.name)
			if is_training:
				with tf.name_scope('grad_summary'):
					for var, grad in zip(self.tvars, self.all_grads):
						variable_summaries(grad, 'grad/' + var.op.name)

		self.merged = tf.merge_all_summaries()