Ejemplo n.º 1
0
def build_train_graph_for_RVAE(rvae_block, look_behind_length=0):
    token_emb_size = get_size_of_input_vecotrs(rvae_block)

    c = td.Composition()
    with c.scope():
        padded_input_sequence = td.Map(td.Vector(token_emb_size)).reads(
            c.input)
        network_output = rvae_block
        network_output.reads(padded_input_sequence)

        un_normalised_token_probs = td.GetItem(0).reads(network_output)
        mus_and_log_sigs = td.GetItem(1).reads(network_output)

        input_sequence = td.Slice(
            start=look_behind_length).reads(padded_input_sequence)
        # TODO: metric that output of rnn is the same as input sequence
        cross_entropy_loss = td.ZipWith(
            td.Function(softmax_crossentropy)) >> td.Mean()
        cross_entropy_loss.reads(un_normalised_token_probs, input_sequence)
        kl_loss = td.Function(kl_divergence)
        kl_loss.reads(mus_and_log_sigs)

        td.Metric('cross_entropy_loss').reads(cross_entropy_loss)
        td.Metric('kl_loss').reads(kl_loss)

        c.output.reads(td.Void())

    return c
Ejemplo n.º 2
0
def add_metrics(is_root):
    c = td.Composition(name='predict(is_root=%s)' % (is_root))
    with c.scope():
        labels = c.input[0]
        logits = td.GetItem(0).reads(c.input[1])
        state = td.GetItem(1).reads(c.input[1])

        loss = td.Function(tf_node_loss)
        td.Metric('all_loss').reads(loss.reads(logits, labels))
        if is_root:
            td.Metric('root_loss').reads(loss)

        result_logits = td.Function(tf_logits)
        td.Metric('all_logits').reads(result_logits.reads(logits))
        if is_root:
            td.Metric('root_logits').reads(result_logits)
        # reserve pred and labels
        pred = td.Function(tf_pred)
        td.Metric('all_pred').reads(pred.reads(logits))
        if is_root:
            td.Metric('root_pred').reads(pred)
        answer = td.Function(tf_label)
        td.Metric('all_labels').reads(answer.reads(labels))
        if is_root:
            td.Metric('root_label').reads(answer)

        c.output.reads(state)
    return c
Ejemplo n.º 3
0
    def set_metrics(self, train=True):
        """A block that adds metrics for loss and hits;
           output is the LSTM state."""
        c = td.Composition(
            name='predict')
        with c.scope():
            # destructure the input; (labels, logits)
            labels = c.input[0]
            logits = c.input[1]

            # calculate loss
            loss = td.Function(self.tf_node_loss)
            td.Metric('root_loss').reads(loss.reads(logits, labels))

            hits = td.Function(self.tf_fine_grained_hits)
            td.Metric('root_hits').reads(hits.reads(logits, labels))

            c.output.reads(logits)
        return c
Ejemplo n.º 4
0
def build_token_level_RVAE(z_size, token_emb_size, look_behind_length):
    c = td.Composition()
    c.set_input_type(
        td.SequenceType(td.TensorType(([token_emb_size]), 'float32')))
    with c.scope():
        padded_input_sequence = c.input
        # build encoder block
        encoder_rnn_cell = build_program_encoder(default_gru_cell(2 * z_size))

        output_sequence = td.RNN(encoder_rnn_cell) >> td.GetItem(0)
        mus_and_log_sigs = output_sequence >> td.GetItem(-1)

        reparam_z = resampling_block(z_size)

        if look_behind_length > 0:
            decoder_input_sequence = (
                td.Slice(stop=-1) >> td.NGrams(look_behind_length) >> td.Map(
                    td.Concat()))
        else:
            decoder_input_sequence = td.Map(
                td.Void() >> td.FromTensor(tf.zeros((0, ))))

        # build decoder block
        un_normalised_token_probs = build_program_decoder(
            token_emb_size, default_gru_cell(z_size), just_tokens=True)

        # remove padding for input sequence
        input_sequence = td.Slice(start=look_behind_length)
        input_sequence.reads(padded_input_sequence)

        mus_and_log_sigs.reads(input_sequence)
        reparam_z.reads(mus_and_log_sigs)

        decoder_input_sequence.reads(padded_input_sequence)
        td.Metric('encoder_sequence_length').reads(
            td.Length().reads(input_sequence))
        td.Metric('decoder_sequence_length').reads(
            td.Length().reads(decoder_input_sequence))
        un_normalised_token_probs.reads(decoder_input_sequence, reparam_z)

        c.output.reads(un_normalised_token_probs, mus_and_log_sigs)
    return c
Ejemplo n.º 5
0
def bidirectional_dynamic_FC(fw_cell, bw_cell, hidden):
    bidir_conv_lstm = td.Composition()
    with bidir_conv_lstm.scope():
        fw_seq = td.Identity().reads(bidir_conv_lstm.input[0])
        labels = (
            td.GetItem(1) >> td.Map(td.Metric("labels")) >> td.Void()).reads(
                bidir_conv_lstm.input)
        bw_seq = td.Slice(step=-1).reads(fw_seq)

        forward_dir = (td.RNN(fw_cell) >> td.GetItem(0)).reads(fw_seq)
        back_dir = (td.RNN(bw_cell) >> td.GetItem(0)).reads(bw_seq)
        back_to_leftright = td.Slice(step=-1).reads(back_dir)

        output_transform = td.FC(1, activation=None)

        bidir_common = (td.ZipWith(
            td.Concat() >> output_transform >> td.Metric('logits'))).reads(
                forward_dir, back_to_leftright)

        bidir_conv_lstm.output.reads(bidir_common)
    return bidir_conv_lstm
Ejemplo n.º 6
0
    def add_metrics(is_root, is_neutral):
        """A block that adds metrics for loss and hits; output is the LSTM state."""
        c = td.Composition(name='predict(is_root=%s, is_neutral=%s)' %
                           (is_root, is_neutral))
        with c.scope():
            # destructure the input; (labels, (logits, state))
            labels = c.input[0]
            logits = td.GetItem(0).reads(c.input[1])
            state = td.GetItem(1).reads(c.input[1])

            # calculate loss
            loss = td.Function(tf_node_loss)
            td.Metric('all_loss').reads(loss.reads(logits, labels))
            if is_root: td.Metric('root_loss').reads(loss)

            # calculate fine-grained hits
            hits = td.Function(tf_fine_grained_hits)
            td.Metric('all_hits').reads(hits.reads(logits, labels))
            if is_root: td.Metric('root_hits').reads(hits)

            # calculate binary hits, if the label is not neutral
            if not is_neutral:
                binary_hits = td.Function(tf_binary_hits).reads(logits, labels)
                td.Metric('all_binary_hits').reads(binary_hits)
                if is_root: td.Metric('root_binary_hits').reads(binary_hits)

            # output the state, which will be read by our by parent's LSTM cell
            c.output.reads(state)
        return c
Ejemplo n.º 7
0
    def _compile(self):
        with self.sess.as_default(): 
            import tensorflow_fold as td
        
        output_size = len(self.labels)
        self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None)

        fshape = (self.window_size * (self.char_embedding_size + self.char_feature_embedding_size), self.num_filters)
        filt_w3 = tf.Variable(tf.random_normal(fshape, stddev=0.05))

        def CNN_Window3(filters):
            return td.Function(lambda a, b, c: cnn_operation([a,b,c],filters))

        def cnn_operation(window_sequences,filters):
            windows = tf.concat(window_sequences,axis=-1)
            products = tf.multiply(tf.expand_dims(windows,axis=-1),filters)
            return tf.reduce_sum(products,axis=-2)

        char_emb = td.Embedding(num_buckets=self.char_buckets, 
                                num_units_out=self.char_embedding_size)
        
        cnn_layer = (td.NGrams(self.window_size) 
                        >> td.Map(CNN_Window3(filt_w3)) 
                        >> td.Max())

        # --------- char features
        
        def charfeature_lookup(c):
            if c in string.lowercase:
                return 0
            elif c in string.uppercase:
                return 1
            elif c in string.punctuation:
                return 2
            else:
                return 3

        char_input = td.Map(td.InputTransform(lambda c: ord(c.lower())) 
                            >> td.Scalar('int32') >> char_emb)
                            
        char_features = td.Map(td.InputTransform(charfeature_lookup) 
                            >> td.Scalar(dtype='int32') 
                            >> td.Embedding(num_buckets=4,
                                            num_units_out=self.char_feature_embedding_size))

        charlevel = (td.InputTransform(lambda s: ['~'] + [ c for c in s ] + ['~']) 
                        >> td.AllOf(char_input,char_features) >> td.ZipWith(td.Concat()) 
                        >> cnn_layer)        

        # --------- word features
        
        word_emb = td.Embedding(num_buckets=len(self.word_vocab),
                                num_units_out=self.embedding_size,
                                initializer=self.word_embeddings)
        
        wordlookup = lambda w: (self.word_vocab.index(w.lower()) 
                                if w.lower() in self.word_vocab else 0)
        
        wordinput = (td.InputTransform(wordlookup) 
                        >> td.Scalar(dtype='int32') 
                        >> word_emb)
        
        def wordfeature_lookup(w):
            if re.match('^[a-z]+$',w):
                return 0
            elif re.match('^[A-Z][a-z]+$',w):
                return 1
            elif re.match('^[A-Z]+$',w):
                return 2
            elif re.match('^[A-Za-z]+$',w):
                return 3
            else:
                return 4
        
        wordfeature = (td.InputTransform(wordfeature_lookup) 
                        >> td.Scalar(dtype='int32') 
                        >> td.Embedding(num_buckets=5,
                                num_units_out=32))
        
        #-----------
        
        rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(
                        num_units=self.rnn_dim), 'lstm_fwd')
        fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0)
        
        rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(
                        num_units=self.rnn_dim), 'lstm_bwd')
        bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) 
                    >> td.GetItem(0) >> td.Slice(step=-1))
        
        rnn_layer = td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat())
        
        output_layer = td.FC(output_size, 
                             input_keep_prob=self.keep_prob, 
                             activation=None)
        
        wordlevel = td.AllOf(wordinput,wordfeature) >> td.Concat()
        
        network = (td.Map(td.AllOf(wordlevel,charlevel) >> td.Concat()) 
                        >> rnn_layer 
                        >> td.Map(output_layer) 
                        >> td.Map(td.Metric('y_out'))) >> td.Void()
    
        groundlabels = td.Map(td.Vector(output_size,dtype=tf.int32) 
                                >> td.Metric('y_true')) >> td.Void()
    
        self.compiler = td.Compiler.create((network, groundlabels))
        
        self.y_out = self.compiler.metric_tensors['y_out']
        self.y_true = self.compiler.metric_tensors['y_true']
        
        self.y_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
            logits=self.y_out,labels=self.y_true))

        self.y_prob = tf.nn.softmax(self.y_out)
        self.y_true_idx = tf.argmax(self.y_true,axis=-1)
        self.y_pred_idx = tf.argmax(self.y_prob,axis=-1)
        
        self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32)
        
        epoch_step = tf.Variable(0, trainable=False)
        self.epoch_step_op = tf.assign(epoch_step, epoch_step+1)
            
        lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay)
            
        if self.optimizer == 'adam':
            self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay)
        elif self.optimizer == 'adagrad':
            self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay,
                                                initial_accumulator_value=1e-08)
        elif self.optimizer == 'rmsprop':
            self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay,
                                                 epsilon=1e-08)
        else:
            raise Exception(('The optimizer {} is not in list of available ' 
                            + 'optimizers: default, adam, adagrad, rmsprop.')
                            .format(self.optimizer))
        
        # apply learning multiplier on on embedding learning rate
        embeds = [word_emb.weights]
        grads_and_vars = self.opt.compute_gradients(self.y_loss)
        found = 0
        for i, (grad, var) in enumerate(grads_and_vars):
            if var in embeds:
                found += 1
                grad = tf.scalar_mul(self.embedding_factor, grad)
                grads_and_vars[i] = (grad, var)
        
        assert found == len(embeds)  # internal consistency check
        self.train_step = self.opt.apply_gradients(grads_and_vars)        
        
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(max_to_keep=100)
Ejemplo n.º 8
0
    network_output = build_VAE(Z_SIZE, 54)

    network_output.reads(input_sequence)

    un_normalised_token_probs = td.GetItem(0).reads(network_output)
    mus_and_log_sigs = td.GetItem(1).reads(network_output)

    cross_entropy_loss = td.ZipWith(td.Function(softmax_crossentropy)) >> td.Mean()
    cross_entropy_loss.reads(
        un_normalised_token_probs,
        input_sequence
    )
    kl_loss = td.Function(kl_divergence)
    kl_loss.reads(mus_and_log_sigs)

    td.Metric('cross_entropy_loss').reads(cross_entropy_loss)
    td.Metric('kl_loss').reads(kl_loss)

    c.output.reads(td.Void())



#  Tokenised version of my code
example_input = np.array([
    1,  2, 51, 16,  4, 17, 52,  3, 53, 16,  5, 38,  6, 37,  6, 37,  6,
    37,  6, 38,  6, 37,  6, 37,  6, 38, 53, 16,  8,  9, 10, 11, 12, 13,
    14,  7, 51, 17, 11, 48, 11,  8, 52, 53, 17,  5, 37,  6, 38,  6, 37,
    6, 38,  6, 38, 53, 17,  8,  9, 10, 11,  7, 51,  9, 26, 51, 20,  9,
    9, 52, 52,  0
])