Ejemplo n.º 1
0
 def interactive_generate(self, initial_text, generation_length, *args):
     vocab = get_character(self.dataset)
     initial_code = []
     for char in initial_text:
         initial_code += [np.where(vocab == char)[0]]
     initial_code = np.array(initial_code)
     inputs_ = initial_code
     all_output_probabilities = []
     logger.info("\nGeneration:")
     for i in range(generation_length):
         # time x batch x features (1 x 1 x vocab_size)
         last_output = self.generate(inputs_)[-1][-1:, :, :]
         # time x features (1 x vocab_size) '0' is for removing one dim
         last_output_probabilities = softmax(last_output[0])
         all_output_probabilities += [last_output_probabilities]
         # 1 x 1
         if self.softmax_sampling == 'argmax':
             argmax = True
         else:
             argmax = False
         last_output_sample = sample(last_output_probabilities, argmax)
         inputs_ = np.vstack([inputs_, last_output_sample])
     # time x batch
     whole_sentence_code = inputs_
     # whole_sentence
     whole_sentence = ''
     for char in vocab[whole_sentence_code[:, 0]]:
         whole_sentence += char
     logger.info(whole_sentence[:initial_code.shape[0]] + ' ...')
     logger.info(whole_sentence)
Ejemplo n.º 2
0
 def interactive_generate(self, initial_text, generation_length, *args):
     vocab = get_character(self.dataset)
     initial_code = []
     for char in initial_text:
         initial_code += [np.where(vocab == char)[0]]
     initial_code = np.array(initial_code)
     inputs_ = initial_code
     all_output_probabilities = []
     logger.info("\nGeneration:")
     for i in range(generation_length):
         # time x batch x features (1 x 1 x vocab_size)
         last_output = self.generate(inputs_)[-1][-1:, :, :]
         # time x features (1 x vocab_size) '0' is for removing one dim
         last_output_probabilities = softmax(last_output[0])
         all_output_probabilities += [last_output_probabilities]
         # 1 x 1
         if self.softmax_sampling == 'argmax':
             argmax = True
         else:
             argmax = False
         last_output_sample = sample(last_output_probabilities, argmax)
         inputs_ = np.vstack([inputs_, last_output_sample])
     # time x batch
     whole_sentence_code = inputs_
     # whole_sentence
     whole_sentence = ''
     for char in vocab[whole_sentence_code[:, 0]]:
         whole_sentence += char
     logger.info(whole_sentence[:initial_code.shape[0]] + ' ...')
     logger.info(whole_sentence)
def visualize_gradients_flow_pie(hidden_states, updates,
                                 args, text='[done]. Finally'):
    unfolding_length = len(text)
    variables = ComputationGraph(hidden_states).variables

    if args.rnn_type == 'lstm':
        rnn_type = 'lstm'
    elif args.rnn_type == 'simple':
        rnn_type = 'simplerecurrent'
    else:
        raise NotImplemented
    states = []
    for d in range(args.layers):
        states.append([variable for variable in variables
                       if variable.name == (rnn_type + '_' +
                                            str(d) + '_apply_states')][1])
        # [1] is because there are two '*_apply_states' in variables.
    pre_rnns = [variable for variable in variables
                if ((variable.name is not None) and
                    ('pre_rnn' in variable.name))]

    grads = []
    for i in range(unfolding_length):
        grads.append(tensor.sum(tensor.abs_(tensor.grad(
            tensor.mean(tensor.abs_(pre_rnns[0][i])),
            pre_rnns[0:1])), axis=0))

        for layer, state in enumerate(states):
            grads.append(tensor.sum(tensor.abs_(tensor.grad(
                tensor.mean(tensor.abs_(state[i])),
                pre_rnns[0:layer + 1])), axis=0))

    # Handle the theano shared variables for the state
    state_vars = [theano.shared(
        v[0:1, :].zeros_like().eval(), v.name + '-gen')
        for v, _ in updates]
    givens = [(v, x) for (v, _), x in zip(updates, state_vars)]
    f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)]

    # Compile the function
    logger.info("The compilation of the function has started")
    compiled_functions = [theano.function(
        inputs=ComputationGraph(grad).inputs,
        outputs=grad,
        givens=givens, updates=f_updates,
        mode=Mode(optimizer=None)) for grad in grads]
    logger.info("The function has been compiled")

    # input text
    vocab = get_character(args.dataset)
    code = []
    for char in text:
        code += [np.where(vocab == char)[0]]
    code = np.array(code)

    res = [f(code) for f in compiled_functions]
    all_time_steps = []
    for i in range(unfolding_length):
        temp = []
        for d in range(args.layers + 1):
            temp.append(np.sum(np.abs(res[i * (args.layers + 1) + d]),
                               axis=(1, 2)))
        all_values = np.vstack([layer / np.sum(layer, axis=0)
                                for layer in temp])
        all_time_steps += [all_values.T[:, ::-1]]
    # +1 is to show inputs as well
    plot_pie_charts(data=all_time_steps, layers=args.layers + 1,
                    time_steps=unfolding_length,
                    path=args.save_path + '/pie.png',
                    text=text)
Ejemplo n.º 4
0
def visualize_gradients_flow_pie(hidden_states,
                                 updates,
                                 args,
                                 text='[done]. Finally'):
    unfolding_length = len(text)
    variables = ComputationGraph(hidden_states).variables

    if args.rnn_type == 'lstm':
        rnn_type = 'lstm'
    elif args.rnn_type == 'simple':
        rnn_type = 'simplerecurrent'
    else:
        raise NotImplemented
    states = []
    for d in range(args.layers):
        states.append([
            variable for variable in variables
            if variable.name == (rnn_type + '_' + str(d) + '_apply_states')
        ][1])
        # [1] is because there are two '*_apply_states' in variables.
    pre_rnns = [
        variable for variable in variables
        if ((variable.name is not None) and ('pre_rnn' in variable.name))
    ]

    grads = []
    for i in range(unfolding_length):
        grads.append(
            tensor.sum(tensor.abs_(
                tensor.grad(tensor.mean(tensor.abs_(pre_rnns[0][i])),
                            pre_rnns[0:1])),
                       axis=0))

        for layer, state in enumerate(states):
            grads.append(
                tensor.sum(tensor.abs_(
                    tensor.grad(tensor.mean(tensor.abs_(state[i])),
                                pre_rnns[0:layer + 1])),
                           axis=0))

    # Handle the theano shared variables for the state
    state_vars = [
        theano.shared(v[0:1, :].zeros_like().eval(), v.name + '-gen')
        for v, _ in updates
    ]
    givens = [(v, x) for (v, _), x in zip(updates, state_vars)]
    f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)]

    # Compile the function
    logger.info("The compilation of the function has started")
    compiled_functions = [
        theano.function(inputs=ComputationGraph(grad).inputs,
                        outputs=grad,
                        givens=givens,
                        updates=f_updates,
                        mode=Mode(optimizer=None)) for grad in grads
    ]
    logger.info("The function has been compiled")

    # input text
    vocab = get_character(args.dataset)
    code = []
    for char in text:
        code += [np.where(vocab == char)[0]]
    code = np.array(code)

    res = [f(code) for f in compiled_functions]
    all_time_steps = []
    for i in range(unfolding_length):
        temp = []
        for d in range(args.layers + 1):
            temp.append(
                np.sum(np.abs(res[i * (args.layers + 1) + d]), axis=(1, 2)))
        all_values = np.vstack(
            [layer / np.sum(layer, axis=0) for layer in temp])
        all_time_steps += [all_values.T[:, ::-1]]
    # +1 is to show inputs as well
    plot_pie_charts(data=all_time_steps,
                    layers=args.layers + 1,
                    time_steps=unfolding_length,
                    path=args.save_path + '/pie.png',
                    text=text)