Esempio n. 1
0
 def linearLSTM_over_TreeLstm(self, num_classes, sent_lstm_num_units):
     self.sent_cell = td.ScopedLayer(tf.contrib.rnn.BasicLSTMCell(
         num_units=sent_lstm_num_units), name_or_scope = self._sent_lstm_default_scope_name)
     sent_lstm = (td.Map(self.tree_lstm.tree_lstm()
                         >> td.Concat()) >> td.RNN(self.sent_cell))
     self.output_layer = td.FC(
         num_classes, activation=None, name=self._output_layer_default_scope_name)
     return (td.Scalar('int32'), sent_lstm >> td.GetItem(1)
             >> td.GetItem(0) >> self.output_layer) \
         >> self.set_metrics()
Esempio n. 2
0
    def __init__(self, weight_matrix, word_idx, ModelConfig):

        self.ModelConfig = ModelConfig

        self.word_embedding = td.Embedding(*weight_matrix.shape,
                                           initializer=weight_matrix,
                                           name='word_embedding')
        self.word_idx = word_idx

        self.keep_prob_ph = tf.placeholder_with_default(1.0, [])
        self.tree_lstm = td.ScopedLayer(tf.contrib.rnn.DropoutWrapper(
            BinaryTreeLSTMCell(self.ModelConfig.lstm_num_units,
                               keep_prob=self.keep_prob_ph),
            input_keep_prob=self.keep_prob_ph,
            output_keep_prob=self.keep_prob_ph),
                                        name_or_scope='tree_lstm')
        self.output_layer = td.FC(self.ModelConfig.num_classes,
                                  activation=None,
                                  name='output_layer')

        self.embed_subtree = td.ForwardDeclaration(name='embed_subtree')
        self.model = self.embed_tree(is_root=True)
        self.embed_subtree.resolve_to(self.embed_tree(is_root=False))

        self.compiler = td.Compiler.create(self.model)
        print('input type: %s' % self.model.input_type)
        print('output type: %s' % self.model.output_type)

        self.metrics = {
            k: tf.reduce_mean(v)
            for k, v in self.compiler.metric_tensors.items()
        }

        self.loss = tf.reduce_sum(self.compiler.metric_tensors['all_loss'])
        opt = tf.train.AdagradOptimizer(ModelConfig.learning_rate)

        grads_and_vars = opt.compute_gradients(self.loss)
        found = 0
        for i, (grad, var) in enumerate(grads_and_vars):
            if var == self.word_embedding.weights:
                found += 1
                grad = tf.scalar_mul(ModelConfig.embedding_learning_rate, grad)
                grads_and_vars[i] = (grad, var)
        assert found == 1  # internal consistency check
        self.train_op = opt.apply_gradients(grads_and_vars)

        self.saver = tf.train.Saver()
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
def build_program_decoder_for_analysis(token_emb_size, rnn_cell):
    """
    Does the same as build_program_decoder_for_analysis, but also returns
        the final hidden state of the decoder
    """
    decoder_rnn = td.ScopedLayer(rnn_cell, 'decoder')
    decoder_rnn_output = td.RNN(decoder_rnn,
                                initial_state_from_input=True) >> td.GetItem(0)

    fc_layer = td.FC(token_emb_size,
                     activation=tf.nn.relu,
                     initializer=tf.contrib.layers.xavier_initializer(),
                     name='encoder_fc')
    # decoder_rnn_output.reads()
    un_normalised_token_probs = td.Map(fc_layer)
    return decoder_rnn_output >> td.AllOf(un_normalised_token_probs,
                                          td.Identity())
def build_program_decoder(token_emb_size, rnn_cell, just_tokens=False):
    """
    Used for blind or 'look-behind' decoders
    """
    decoder_rnn = td.ScopedLayer(rnn_cell, 'decoder')
    decoder_rnn_output = td.RNN(decoder_rnn,
                                initial_state_from_input=True) >> td.GetItem(0)

    fc_layer = td.FC(
        token_emb_size,
        activation=tf.nn.relu,
        initializer=tf.contrib.layers.xavier_initializer(),
        name='encoder_fc'  # this is fantastic
    )

    # un_normalised_token_probs = decoder_rnn_output >> td.Map(fc_layer)
    if just_tokens:
        return decoder_rnn_output >> td.Map(fc_layer)
    else:
        return decoder_rnn_output >> td.AllOf(td.Map(fc_layer), td.Identity())
Esempio n. 5
0
 def __init__(self,
              weights,
              vocab,
              tree_lstm_num_units,
              tree_binarizer=None):
     if not tree_binarizer:
         tree_binarizer = TreeBinarizer(vocab, dict())
     self.tree_binarizer = tree_binarizer
     self.tree_lstm_keep_prob_ph = tf.placeholder_with_default(1.0, [])
     self.tree_lstm_cell = td.ScopedLayer(
         tf.contrib.rnn.DropoutWrapper(
             BinaryTreeLSTMCell(tree_lstm_num_units,
                                self.tree_lstm_keep_prob_ph),
             self.tree_lstm_keep_prob_ph, self.tree_lstm_keep_prob_ph),
         name_or_scope=self._tree_lstm_cell_default_scope_name)
     self.word_embedding = td.Embedding(
         *weights.shape,
         initializer=weights,
         name=self._word_embedding_default_scope_name)
     self.embed_subtree = td.ForwardDeclaration(name='embed_subtree')
     self.vocab = vocab
Esempio n. 6
0
                    new_c += c * tf.sigmoid(fg_list[i] + self.forget_bias)
            new_c += tf.sigmoid(ig) * jg

            new_h = tf.tanh(new_c) * tf.sigmoid(og)
            resultH = tf.concat([new_h], 1)

            resultH = tf.nn.dropout(resultH, self._keep_prob)
            return resultH, [new_c, new_h]


# dropout keep probability, with a default of 1 (for eval).
keep_prob_ph = tf.placeholder_with_default(1.0, [])

lstm_num_units = 256  # Tai et al. used 150
tree_lstm = td.ScopedLayer(binaryTreeLSTMCell(lstm_num_units,
                                              keep_prob=keep_prob_ph),
                           name_or_scope='tree_lstm')
NUM_CLASSES = 6  # number of distinct labels
output_layer = td.FC(NUM_CLASSES, activation=None, name='output_layer')

word_embedding = td.Embedding(*weight_matrix.shape,
                              initializer=weight_matrix,
                              name='word_embedding',
                              trainable=False)

# declare recursive model
embed_subtree = td.ForwardDeclaration(name='embed_subtree')


def makeContextMat(input1):
    input1 = int(input1)
Esempio n. 7
0
def run(write_to, batch_size_setting):
    startTime = time.time()

    data_dir = "../senti/"
    """
  def download_and_unzip(url_base, zip_name, *file_names):
    zip_path = os.path.join(data_dir, zip_name)
    url = url_base + zip_name
    print('downloading %s to %s' % (url, zip_path))
    urllib.request.urlretrieve(url, zip_path)
    out_paths = []
    with zipfile.ZipFile(zip_path, 'r') as f:
      for file_name in file_names:
        print('extracting %s' % file_name)
        out_paths.append(f.extract(file_name, path=data_dir))
    return out_paths

  def download(url_base, zip_name):
    zip_path = os.path.join(data_dir, zip_name)
    url = url_base + zip_name
    print('downloading %s to %s' % (url, zip_path))
    urllib.request.urlretrieve(url, zip_path)


  full_glove_path, = download_and_unzip(
    'http://nlp.stanford.edu/data/', 'glove.840B.300d.zip',
    'glove.840B.300d.txt')

  train_path, dev_path, test_path = download_and_unzip(
    'http://nlp.stanford.edu/sentiment/', 'trainDevTestTrees_PTB.zip',
    'trees/train.txt', 'trees/dev.txt', 'trees/test.txt')


  filtered_glove_path = os.path.join(data_dir, 'filtered_glove.txt')

  def filter_glove():
    vocab = set()
    # Download the full set of unlabeled sentences separated by '|'.
    sentence_path, = download_and_unzip(
      'http://nlp.stanford.edu/~socherr/', 'stanfordSentimentTreebank.zip',
      'stanfordSentimentTreebank/SOStr.txt')
    with codecs.open(sentence_path, encoding='utf-8') as f:
      for line in f:
        # Drop the trailing newline and strip backslashes. Split into words.
        vocab.update(line.strip().replace('\\', '').split('|'))
    nread = 0
    nwrote = 0
    with codecs.open(full_glove_path, encoding='utf-8') as f:
      with codecs.open(filtered_glove_path, 'w', encoding='utf-8') as out:
        for line in f:
          nread += 1
          line = line.strip()
          if not line: continue
          if line.split(u' ', 1)[0] in vocab:
            out.write(line + '\n')
            nwrote += 1
    print('read %s lines, wrote %s' % (nread, nwrote))
  #filter_glove()
  """

    dev_glove_path = os.path.join('./', 'small_glove.txt')

    def load_embeddings(embedding_path):
        """Loads embedings, returns weight matrix and dict from words to indices."""
        print('loading word embeddings from %s' % embedding_path)
        weight_vectors = []
        word_idx = {}
        with codecs.open(embedding_path, encoding='utf-8') as f:
            for line in f:
                word, vec = line.split(u' ', 1)
                word_idx[word] = len(weight_vectors)
                weight_vectors.append(np.array(vec.split(), dtype=np.float32))
        # Annoying implementation detail; '(' and ')' are replaced by '-LRB-' and
        # '-RRB-' respectively in the parse-trees.
        #word_idx[u'-LRB-'] = word_idx.pop(u'(')
        #word_idx[u'-RRB-'] = word_idx.pop(u')')
        # Random embedding vector for unknown words.
        weight_vectors.append(
            np.random.uniform(-0.05, 0.05,
                              weight_vectors[0].shape).astype(np.float32))
        return np.stack(weight_vectors), word_idx

    weight_matrix, word_idx = load_embeddings(dev_glove_path)

    def load_trees(filename):
        with codecs.open(filename, encoding='utf-8') as f:
            # Drop the trailing newline and strip \s.
            trees = [line.strip().replace('\\', '') for line in f]
            print('loaded %s trees from %s' % (len(trees), filename))
            return trees

    #train_path = './senti/trees/train.txt'
    #train_path =  os.path.join(data_dir, 'trees/dev.txt')
    train_path = './dev.txt'
    #dev_path = './senti/trees/dev.txt'
    #test_path = './senti/trees/test.txt'

    train_trees = load_trees(train_path)
    trainSIZE = len(train_trees)

    #dev_trees = load_trees(dev_path)
    #test_trees = load_trees(test_path)

    class BinaryTreeLSTMCell(tf.contrib.rnn.BasicLSTMCell):
        """LSTM with two state inputs.

    This is the model described in section 3.2 of 'Improved Semantic
    Representations From Tree-Structured Long Short-Term Memory
    Networks' <http://arxiv.org/pdf/1503.00075.pdf>, with recurrent
    dropout as described in 'Recurrent Dropout without Memory Loss'
    <http://arxiv.org/pdf/1603.05118.pdf>.
    """
        def __init__(self, num_units, keep_prob=1.0):
            """Initialize the cell.

      Args:
        num_units: int, The number of units in the LSTM cell.
        keep_prob: Keep probability for recurrent dropout.
      """
            super(BinaryTreeLSTMCell, self).__init__(num_units)
            self._keep_prob = keep_prob

        def __call__(self, inputs, state, scope=None):
            with tf.variable_scope(scope or type(self).__name__):
                lhs, rhs = state
                c0, h0 = lhs
                c1, h1 = rhs
                concat = tf.contrib.layers.linear(
                    tf.concat([inputs, h0, h1], 1), 5 * self._num_units)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                i, j, f0, f1, o = tf.split(value=concat,
                                           num_or_size_splits=5,
                                           axis=1)

                j = self._activation(j)
                if not isinstance(self._keep_prob,
                                  float) or self._keep_prob < 1:
                    j = tf.nn.dropout(j, self._keep_prob)

                new_c = (c0 * tf.sigmoid(f0 + self._forget_bias) +
                         c1 * tf.sigmoid(f1 + self._forget_bias) +
                         tf.sigmoid(i) * j)
                new_h = self._activation(new_c) * tf.sigmoid(o)

                new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

                return new_h, new_state

    keep_prob_ph = tf.placeholder_with_default(1.0, [])

    lstm_num_units = 150  # Tai et al. used 150, but our regularization strategy is more effective
    tree_lstm = td.ScopedLayer(tf.contrib.rnn.DropoutWrapper(
        BinaryTreeLSTMCell(lstm_num_units, keep_prob=keep_prob_ph),
        input_keep_prob=keep_prob_ph,
        output_keep_prob=keep_prob_ph),
                               name_or_scope='tree_lstm')

    NUM_CLASSES = 5  # number of distinct sentiment labels
    output_layer = td.FC(NUM_CLASSES, activation=None, name='output_layer')

    word_embedding = td.Embedding(*weight_matrix.shape,
                                  initializer=weight_matrix,
                                  name='word_embedding',
                                  trainable=False)

    embed_subtree = td.ForwardDeclaration(name='embed_subtree')

    def logits_and_state():
        """Creates a block that goes from tokens to (logits, state) tuples."""
        unknown_idx = len(word_idx)
        lookup_word = lambda word: word_idx.get(word, unknown_idx)

        word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >>
                    td.Scalar('int32') >> word_embedding)

        pair2vec = (embed_subtree(), embed_subtree())

        # Trees are binary, so the tree layer takes two states as its input_state.
        zero_state = td.Zeros((tree_lstm.state_size, ) * 2)
        # Input is a word vector.
        zero_inp = td.Zeros(word_embedding.output_type.shape[0])

        word_case = td.AllOf(word2vec, zero_state)
        pair_case = td.AllOf(zero_inp, pair2vec)

        tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)])

        return tree2vec >> tree_lstm >> (output_layer, td.Identity())

    def tf_node_loss(logits, labels):
        return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=labels)

    def tf_fine_grained_hits(logits, labels):
        predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
        return tf.cast(tf.equal(predictions, labels), tf.float64)

    def tf_binary_hits(logits, labels):
        softmax = tf.nn.softmax(logits)
        binary_predictions = (softmax[:, 3] + softmax[:, 4]) > (softmax[:, 0] +
                                                                softmax[:, 1])
        binary_labels = labels > 2
        return tf.cast(tf.equal(binary_predictions, binary_labels), tf.float64)

    def add_metrics(is_root, is_neutral):
        """A block that adds metrics for loss and hits; output is the LSTM state."""
        c = td.Composition(name='predict(is_root=%s, is_neutral=%s)' %
                           (is_root, is_neutral))
        with c.scope():
            # destructure the input; (labels, (logits, state))
            labels = c.input[0]
            logits = td.GetItem(0).reads(c.input[1])
            state = td.GetItem(1).reads(c.input[1])

            # calculate loss
            loss = td.Function(tf_node_loss)
            td.Metric('all_loss').reads(loss.reads(logits, labels))
            if is_root: td.Metric('root_loss').reads(loss)

            # calculate fine-grained hits
            hits = td.Function(tf_fine_grained_hits)
            td.Metric('all_hits').reads(hits.reads(logits, labels))
            if is_root: td.Metric('root_hits').reads(hits)

            # calculate binary hits, if the label is not neutral
            if not is_neutral:
                binary_hits = td.Function(tf_binary_hits).reads(logits, labels)
                td.Metric('all_binary_hits').reads(binary_hits)
                if is_root: td.Metric('root_binary_hits').reads(binary_hits)

            # output the state, which will be read by our by parent's LSTM cell
            c.output.reads(state)
        return c

    def tokenize(s):
        label, phrase = s[1:-1].split(None, 1)
        return label, sexpr.sexpr_tokenize(phrase)

    def embed_tree(logits_and_state, is_root):
        """Creates a block that embeds trees; output is tree LSTM state."""
        return td.InputTransform(tokenize) >> td.OneOf(
            key_fn=lambda pair: pair[0] == '2',  # label 2 means neutral
            case_blocks=(add_metrics(is_root, is_neutral=False),
                         add_metrics(is_root, is_neutral=True)),
            pre_block=(td.Scalar('int32'), logits_and_state))

    model = embed_tree(logits_and_state(), is_root=True)

    embed_subtree.resolve_to(embed_tree(logits_and_state(), is_root=False))

    compiler = td.Compiler.create(model)
    print('input type: %s' % model.input_type)
    print('output type: %s' % model.output_type)

    metrics = {
        k: tf.reduce_mean(v)
        for k, v in compiler.metric_tensors.items()
    }

    LEARNING_RATE = 0.05
    KEEP_PROB = 1.0
    BATCH_SIZE = batch_size_setting  #20
    EPOCHS = 6
    EMBEDDING_LEARNING_RATE_FACTOR = 0

    train_feed_dict = {keep_prob_ph: KEEP_PROB}
    loss = tf.reduce_sum(compiler.metric_tensors['all_loss'])
    opt = tf.train.AdagradOptimizer(LEARNING_RATE)

    grads_and_vars = opt.compute_gradients(loss)
    found = 0
    for i, (grad, var) in enumerate(grads_and_vars):
        if var == word_embedding.weights:
            found += 1
            grad = tf.scalar_mul(EMBEDDING_LEARNING_RATE_FACTOR, grad)
            grads_and_vars[i] = (grad, var)
    #assert found == 1  # internal consistency check
    train = opt.apply_gradients(grads_and_vars)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())

    def train_step(batch):
        train_feed_dict[compiler.loom_input_tensor] = batch
        _, batch_loss = sess.run([train, loss], train_feed_dict)
        return batch_loss

    def train_epoch(train_set):
        return sum(
            train_step(batch)
            for batch in td.group_by_batches(train_set, BATCH_SIZE))

    train_set = compiler.build_loom_inputs(train_trees)
    """
  dev_feed_dict = compiler.build_feed_dict(dev_trees)

  def dev_eval(epoch, train_loss):
    dev_metrics = sess.run(metrics, dev_feed_dict)
    dev_loss = dev_metrics['all_loss']
    dev_accuracy = ['%s: %.2f' % (k, v * 100) for k, v in
                    sorted(dev_metrics.items()) if k.endswith('hits')]
    print('epoch:%4d, train_loss: %.3e, dev_loss_avg: %.3e, dev_accuracy:\n  [%s]'
          % (epoch, train_loss, dev_loss, ' '.join(dev_accuracy)))
    return dev_metrics['root_hits']
  """

    best_accuracy = 0.0
    save_path = os.path.join(data_dir, 'sentiment_model')

    loopTime = time.time()
    #print('prepare time %s ' % (loopTime - startTime))

    loss_save = []
    time_save = []
    epoch_start_time = loopTime
    for epoch, shuffled in enumerate(td.epochs(train_set, EPOCHS), 1):
        train_loss = train_epoch(shuffled)
        av_loss = train_loss / trainSIZE
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - epoch_start_time
        time_save.append(epoch_time)
        epoch_start_time = epoch_end_time
        print('train loss is %s at time %s' % (av_loss, epoch_time))
        loss_save.append(av_loss)
        #accuracy = dev_eval(epoch, train_loss)
        #if accuracy > best_accuracy:
        #  best_accuracy = accuracy
        #  checkpoint_path = saver.save(sess, save_path, global_step=epoch)
        #  print('model saved in file: %s' % checkpoint_path)

    loopEndTime = time.time()
    #print('loop time %s ' % (loopEndTime - loopTime))
    prepareTime = loopTime - startTime
    loopTime = loopEndTime - loopTime
    timePerEpoch = loopTime / EPOCHS

    # use median time instead
    time_save.sort()
    median_time = time_save[int(EPOCHS / 2)]

    with open(write_to, "w") as f:
        f.write("unit: " + "1 epoch\n")
        for loss in loss_save:
            f.write(str(loss) + "\n")
        f.write("run time: " + str(prepareTime) + " " + str(median_time) +
                "\n")
Esempio n. 8
0
    def _compile(self):
        with self.sess.as_default(): 
            import tensorflow_fold as td
        
        output_size = len(self.labels)
        self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None)

        fshape = (self.window_size * (self.char_embedding_size + self.char_feature_embedding_size), self.num_filters)
        filt_w3 = tf.Variable(tf.random_normal(fshape, stddev=0.05))

        def CNN_Window3(filters):
            return td.Function(lambda a, b, c: cnn_operation([a,b,c],filters))

        def cnn_operation(window_sequences,filters):
            windows = tf.concat(window_sequences,axis=-1)
            products = tf.multiply(tf.expand_dims(windows,axis=-1),filters)
            return tf.reduce_sum(products,axis=-2)

        char_emb = td.Embedding(num_buckets=self.char_buckets, 
                                num_units_out=self.char_embedding_size)
        
        cnn_layer = (td.NGrams(self.window_size) 
                        >> td.Map(CNN_Window3(filt_w3)) 
                        >> td.Max())

        # --------- char features
        
        def charfeature_lookup(c):
            if c in string.lowercase:
                return 0
            elif c in string.uppercase:
                return 1
            elif c in string.punctuation:
                return 2
            else:
                return 3

        char_input = td.Map(td.InputTransform(lambda c: ord(c.lower())) 
                            >> td.Scalar('int32') >> char_emb)
                            
        char_features = td.Map(td.InputTransform(charfeature_lookup) 
                            >> td.Scalar(dtype='int32') 
                            >> td.Embedding(num_buckets=4,
                                            num_units_out=self.char_feature_embedding_size))

        charlevel = (td.InputTransform(lambda s: ['~'] + [ c for c in s ] + ['~']) 
                        >> td.AllOf(char_input,char_features) >> td.ZipWith(td.Concat()) 
                        >> cnn_layer)        

        # --------- word features
        
        word_emb = td.Embedding(num_buckets=len(self.word_vocab),
                                num_units_out=self.embedding_size,
                                initializer=self.word_embeddings)
        
        wordlookup = lambda w: (self.word_vocab.index(w.lower()) 
                                if w.lower() in self.word_vocab else 0)
        
        wordinput = (td.InputTransform(wordlookup) 
                        >> td.Scalar(dtype='int32') 
                        >> word_emb)
        
        def wordfeature_lookup(w):
            if re.match('^[a-z]+$',w):
                return 0
            elif re.match('^[A-Z][a-z]+$',w):
                return 1
            elif re.match('^[A-Z]+$',w):
                return 2
            elif re.match('^[A-Za-z]+$',w):
                return 3
            else:
                return 4
        
        wordfeature = (td.InputTransform(wordfeature_lookup) 
                        >> td.Scalar(dtype='int32') 
                        >> td.Embedding(num_buckets=5,
                                num_units_out=32))
        
        #-----------
        
        rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(
                        num_units=self.rnn_dim), 'lstm_fwd')
        fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0)
        
        rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(
                        num_units=self.rnn_dim), 'lstm_bwd')
        bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) 
                    >> td.GetItem(0) >> td.Slice(step=-1))
        
        rnn_layer = td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat())
        
        output_layer = td.FC(output_size, 
                             input_keep_prob=self.keep_prob, 
                             activation=None)
        
        wordlevel = td.AllOf(wordinput,wordfeature) >> td.Concat()
        
        network = (td.Map(td.AllOf(wordlevel,charlevel) >> td.Concat()) 
                        >> rnn_layer 
                        >> td.Map(output_layer) 
                        >> td.Map(td.Metric('y_out'))) >> td.Void()
    
        groundlabels = td.Map(td.Vector(output_size,dtype=tf.int32) 
                                >> td.Metric('y_true')) >> td.Void()
    
        self.compiler = td.Compiler.create((network, groundlabels))
        
        self.y_out = self.compiler.metric_tensors['y_out']
        self.y_true = self.compiler.metric_tensors['y_true']
        
        self.y_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
            logits=self.y_out,labels=self.y_true))

        self.y_prob = tf.nn.softmax(self.y_out)
        self.y_true_idx = tf.argmax(self.y_true,axis=-1)
        self.y_pred_idx = tf.argmax(self.y_prob,axis=-1)
        
        self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32)
        
        epoch_step = tf.Variable(0, trainable=False)
        self.epoch_step_op = tf.assign(epoch_step, epoch_step+1)
            
        lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay)
            
        if self.optimizer == 'adam':
            self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay)
        elif self.optimizer == 'adagrad':
            self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay,
                                                initial_accumulator_value=1e-08)
        elif self.optimizer == 'rmsprop':
            self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay,
                                                 epsilon=1e-08)
        else:
            raise Exception(('The optimizer {} is not in list of available ' 
                            + 'optimizers: default, adam, adagrad, rmsprop.')
                            .format(self.optimizer))
        
        # apply learning multiplier on on embedding learning rate
        embeds = [word_emb.weights]
        grads_and_vars = self.opt.compute_gradients(self.y_loss)
        found = 0
        for i, (grad, var) in enumerate(grads_and_vars):
            if var in embeds:
                found += 1
                grad = tf.scalar_mul(self.embedding_factor, grad)
                grads_and_vars[i] = (grad, var)
        
        assert found == len(embeds)  # internal consistency check
        self.train_step = self.opt.apply_gradients(grads_and_vars)        
        
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(max_to_keep=100)
Esempio n. 9
0
    def __init__(self,
                 config,
                 kb,
                 text_seq_batch,
                 seq_length_batch,
                 num_vocab_txt,
                 num_vocab_nmn,
                 EOS_idx,
                 num_choices,
                 decoder_sampling,
                 use_gt_layout=None,
                 gt_layout_batch=None,
                 scope='neural_module_network',
                 reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            # Part 1: Seq2seq RNN to generate module layout tokens

            embedding_mat = tf.get_variable(
                'embedding_mat', [num_vocab_txt, config.embed_dim_txt],
                initializer=tf.contrib.layers.xavier_initializer())

            with tf.variable_scope('layout_generation'):
                att_seq2seq = netgen_att.AttentionSeq2Seq(
                    config, text_seq_batch, seq_length_batch, num_vocab_txt,
                    num_vocab_nmn, EOS_idx, decoder_sampling, embedding_mat,
                    use_gt_layout, gt_layout_batch)
                self.att_seq2seq = att_seq2seq
                predicted_tokens = att_seq2seq.predicted_tokens
                token_probs = att_seq2seq.token_probs
                word_vecs = att_seq2seq.word_vecs
                neg_entropy = att_seq2seq.neg_entropy
                self.atts = att_seq2seq.atts

                self.predicted_tokens = predicted_tokens
                self.token_probs = token_probs
                self.word_vecs = word_vecs
                self.neg_entropy = neg_entropy

                # log probability of each generated sequence
                self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0)

            # Part 2: Neural Module Network
            with tf.variable_scope('layout_execution'):
                modules = Modules(config, kb, word_vecs, num_choices,
                                  embedding_mat)
                self.modules = modules
                # Recursion of modules
                att_shape = [len(kb)]
                # Forward declaration of module recursion
                att_expr_decl = td.ForwardDeclaration(td.PyObjectType(),
                                                      td.TensorType(att_shape))
                # _key_find
                case_key_find = td.Record([
                    ('time_idx', td.Scalar(dtype='int32')),
                    ('batch_idx', td.Scalar(dtype='int32'))
                ])
                case_key_find = case_key_find >> td.ScopedLayer(
                    modules.KeyFindModule, name_or_scope='KeyFindModule')
                # _key_filter
                case_key_filter = td.Record([('input_0', att_expr_decl()),
                                             ('time_idx', td.Scalar('int32')),
                                             ('batch_idx', td.Scalar('int32'))
                                             ])
                case_key_filter = case_key_filter >> td.ScopedLayer(
                    modules.KeyFilterModule, name_or_scope='KeyFilterModule')
                recursion_cases = td.OneOf(td.GetItem('module'), {
                    '_key_find': case_key_find,
                    '_key_filter': case_key_filter
                })
                att_expr_decl.resolve_to(recursion_cases)
                # _val_desc: output scores for choice (for valid expressions)
                predicted_scores = td.Record([('input_0', recursion_cases),
                                              ('time_idx', td.Scalar('int32')),
                                              ('batch_idx', td.Scalar('int32'))
                                              ])
                predicted_scores = predicted_scores >> td.ScopedLayer(
                    modules.ValDescribeModule,
                    name_or_scope='ValDescribeModule')

                # For invalid expressions, define a dummy answer
                # so that all answers have the same form
                INVALID = assembler.INVALID_EXPR
                dummy_scores = td.Void() >> td.FromTensor(
                    np.zeros(num_choices, np.float32))
                output_scores = td.OneOf(td.GetItem('module'), {
                    '_val_desc': predicted_scores,
                    INVALID: dummy_scores
                })

                # compile and get the output scores
                self.compiler = td.Compiler.create(output_scores)
                self.scores = self.compiler.output_tensors[0]

            # Regularization: Entropy + L2
            self.entropy_reg = tf.reduce_mean(neg_entropy)
            module_weights = [
                v for v in tf.trainable_variables()
                if (scope in v.op.name and v.op.name.endswith('weights'))
            ]
            self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
Esempio n. 10
0
def convLSTM_cell(kernel_size, out_features=64):
    convlstm = Conv1DLSTMCell(input_shape=[vsize, 1],
                              output_channels=out_features,
                              kernel_shape=[kernel_size])
    return td.ScopedLayer(convlstm)
Esempio n. 11
0
def multi_FC_cell(units_list):
    return td.ScopedLayer(
        tf.contrib.rnn.MultiRNNCell([
            tf.contrib.rnn.LSTMCell(num_units=units) for units in units_list
        ]))
Esempio n. 12
0
def build_VAE(z_size, token_emb_size):
    c = td.Composition()
    c.set_input_type(td.SequenceType(td.TensorType(([token_emb_size]), 'float32')))
    with c.scope():
        # input_sequence = td.Map(td.Vector(token_emb_size)).reads(c.input)
        input_sequence = c.input

        # encoder composition TODO: refactor this out
        # rnn_cell = td.ScopedLayer(
        #     tf.contrib.rnn.LSTMCell(
        #         num_units=2*z_size,
        #         initializer=tf.contrib.layers.xavier_initializer(),
        #         activation=tf.tanh
        #     ),
        #     'encoder'
        # )
        encoder_rnn_cell = td.ScopedLayer(
            tf.contrib.rnn.GRUCell(
                num_units=2*z_size,
                # initializer=tf.contrib.layers.xavier_initializer(),
                activation=tf.tanh
            ),
            'encoder'
        )
        output_sequence = td.RNN(encoder_rnn_cell) >> td.GetItem(0)
        mus_and_log_sigs = output_sequence >> td.GetItem(-1)

        # reparam_z = mus_and_log_sigs >> td.Function(resampling)
        reparam_z = td.Function(resampling, name='resampling')
        reparam_z.set_input_type(td.TensorType((2 * z_size,)))
        reparam_z.set_output_type(td.TensorType((z_size,)))

        #  A list of same length of input_sequence, but with empty values
        #  this is used for the decoder to map over
        list_of_nothing = td.Map(
            td.Void() >> td.FromTensor(tf.zeros((0,)))
        )

        # decoder composition
        # TODO: refactor this out
        # decoder_rnn = td.ScopedLayer(
        #     tf.contrib.rnn.LSTMCell(
        #         num_units=z_size,
        #         initializer=tf.contrib.layers.xavier_initializer(),
        #         activation=tf.tanh
        #     ),
        #     'decoder'
        # )
        decoder_rnn = td.ScopedLayer(
            tf.contrib.rnn.GRUCell(
                num_units=z_size,
                # initializer=tf.contrib.layers.xavier_initializer(),
                activation=tf.tanh
            ),
            'decoder'
        )
        decoder_rnn_output = td.RNN(
            decoder_rnn,
            initial_state_from_input=True
        ) >> td.GetItem(0)

        fc_layer = td.FC(
            token_emb_size,
            activation=tf.nn.relu,
            initializer=tf.contrib.layers.xavier_initializer()
        )

        un_normalised_token_probs = decoder_rnn_output >> td.Map(fc_layer)

        # reparam_z.reads(input_sequence)
        mus_and_log_sigs.reads(input_sequence)
        reparam_z.reads(mus_and_log_sigs)
        list_of_nothing.reads(input_sequence)
        un_normalised_token_probs.reads(list_of_nothing, reparam_z)

        c.output.reads(un_normalised_token_probs, mus_and_log_sigs)
    return c
Esempio n. 13
0
def multi_convLSTM_cell(kernel_sizes, out_features):
    return td.ScopedLayer(
        tf.contrib.rnn.MultiRNNCell([
            convLSTM_cell(kernel, features)
            for (kernel, features) in zip(kernel_sizes, out_features)
        ]))
Esempio n. 14
0
def build_program_encoder(rnn_cell):
    return td.ScopedLayer(rnn_cell, 'encoder')
    def __init__(self,
                 image_batch,
                 text_seq_batch,
                 seq_length_batch,
                 T_decoder,
                 num_vocab_txt,
                 embed_dim_txt,
                 num_vocab_nmn,
                 embed_dim_nmn,
                 lstm_dim,
                 num_layers,
                 EOS_idx,
                 encoder_dropout,
                 decoder_dropout,
                 decoder_sampling,
                 num_choices,
                 use_gt_layout=None,
                 gt_layout_batch=None,
                 scope='neural_module_network',
                 reuse=None):

        with tf.variable_scope(scope, reuse=reuse):
            # Part 0: Visual feature from CNN
            with tf.variable_scope('image_feature_cnn'):
                image_feat_grid = shapes_convnet(image_batch)
                self.image_feat_grid = image_feat_grid

            # Part 1: Seq2seq RNN to generate module layout tokens
            with tf.variable_scope('layout_generation'):
                att_seq2seq = nmn3_netgen_att.AttentionSeq2Seq(
                    text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt,
                    embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim,
                    num_layers, EOS_idx, encoder_dropout, decoder_dropout,
                    decoder_sampling, use_gt_layout, gt_layout_batch)
                self.att_seq2seq = att_seq2seq
                predicted_tokens = att_seq2seq.predicted_tokens
                token_probs = att_seq2seq.token_probs
                word_vecs = att_seq2seq.word_vecs
                neg_entropy = att_seq2seq.neg_entropy
                self.atts = att_seq2seq.atts

                self.predicted_tokens = predicted_tokens
                self.token_probs = token_probs
                self.word_vecs = word_vecs
                self.neg_entropy = neg_entropy

                # log probability of each generated sequence
                self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0)

            # Part 2: Neural Module Network
            with tf.variable_scope('layout_execution'):
                modules = Modules(image_feat_grid, word_vecs, num_choices)
                self.modules = modules
                # Recursion of modules
                att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1]
                # Forward declaration of module recursion
                att_expr_decl = td.ForwardDeclaration(td.PyObjectType(),
                                                      td.TensorType(att_shape))
                # _Find
                case_find = td.Record([('time_idx', td.Scalar(dtype='int32')),
                                       ('batch_idx', td.Scalar(dtype='int32'))
                                       ])
                case_find = case_find >> \
                    td.ScopedLayer(modules.FindModule, name_or_scope='FindModule')
                # _Transform
                case_transform = td.Record([('input_0', att_expr_decl()),
                                            ('time_idx', td.Scalar('int32')),
                                            ('batch_idx', td.Scalar('int32'))])
                case_transform = case_transform >> \
                    td.ScopedLayer(modules.TransformModule, name_or_scope='TransformModule')
                # _And
                case_and = td.Record([('input_0', att_expr_decl()),
                                      ('input_1', att_expr_decl()),
                                      ('time_idx', td.Scalar('int32')),
                                      ('batch_idx', td.Scalar('int32'))])
                case_and = case_and >> \
                    td.ScopedLayer(modules.AndModule, name_or_scope='AndModule')

                recursion_cases = td.OneOf(
                    td.GetItem('module'), {
                        '_Find': case_find,
                        '_Transform': case_transform,
                        '_And': case_and
                    })
                att_expr_decl.resolve_to(recursion_cases)
                # _Answer: output scores for choice (for valid expressions)
                predicted_scores = td.Record([('input_0', recursion_cases),
                                              ('time_idx', td.Scalar('int32')),
                                              ('batch_idx', td.Scalar('int32'))
                                              ])
                predicted_scores = predicted_scores >> \
                    td.ScopedLayer(modules.AnswerModule, name_or_scope='AnswerModule')

                # For invalid expressions, define a dummy answer
                # so that all answers have the same form
                INVALID = nmn3_assembler.INVALID_EXPR
                dummy_scores = td.Void() >> td.FromTensor(
                    np.zeros(num_choices, np.float32))
                output_scores = td.OneOf(td.GetItem('module'), {
                    '_Answer': predicted_scores,
                    INVALID: dummy_scores
                })

                # compile and get the output scores
                self.compiler = td.Compiler.create(output_scores)
                self.scores = self.compiler.output_tensors[0]

            # Regularization: Entropy + L2
            self.entropy_reg = tf.reduce_mean(neg_entropy)
            module_weights = [
                v for v in tf.trainable_variables()
                if (scope in v.op.name and v.op.name.endswith('weights'))
            ]
            self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
            new_c = (c0 * tf.sigmoid(f0 + self._forget_bias) +
                     c1 * tf.sigmoid(f1 + self._forget_bias) +
                     tf.sigmoid(i) * j)
            new_h = self._activation(new_c) * tf.sigmoid(o)

            new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

            return new_h, new_state


keep_prob_ph = tf.placeholder_with_default(1.0, [])

lstm_num_units = 300
tree_lstm = td.ScopedLayer(tf.contrib.rnn.DropoutWrapper(
    BinaryTreeLSTMCell(lstm_num_units, keep_prob=keep_prob_ph),
    input_keep_prob=keep_prob_ph,
    output_keep_prob=keep_prob_ph),
                           name_or_scope='tree_lstm')

NUM_CLASSES = 5  # number of distinct sentiment labels
output_layer = td.FC(NUM_CLASSES, activation=None, name='output_layer')

word_embedding = td.Embedding(*weight_matrix.shape,
                              initializer=weight_matrix,
                              name='word_embedding')

embed_subtree = td.ForwardDeclaration(name='embed_subtree')


def logits_and_state():
    def _compile(self):
        with self.sess.as_default(): 
            import tensorflow_fold as td
        
        output_size = len(self.labels)
        self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None)

        char_emb = td.Embedding(num_buckets=self.char_buckets, 
                                num_units_out=self.embedding_size)
                                #initializer=tf.truncated_normal_initializer(stddev=0.15))
        char_cell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'char_cell')

        char_lstm = (td.InputTransform(lambda s: [ord(c) for c in s]) 
                    >> td.Map(td.Scalar('int32') >> char_emb) 
                    >> td.RNN(char_cell) >> td.GetItem(1) >> td.GetItem(1))        
        
        rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_fwd')
        fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0)
        
        rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_bwd')
        bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) 
                        >> td.GetItem(0) >> td.Slice(step=-1))
        
        pos_emb = td.Embedding(num_buckets=300,
                    num_units_out=32,
                    initializer=tf.truncated_normal_initializer(stddev=0.1))
        
        pos_x = (td.InputTransform(lambda x: x + 150)
                    >> td.Scalar(dtype='int32') 
                    >> pos_emb)
        
        pos_y = (td.InputTransform(lambda x: x + 150)
                    >> td.Scalar(dtype='int32') 
                    >> pos_emb)
        
        input_layer = td.Map(td.Record((char_lstm,pos_x,pos_y)) >> td.Concat())
        
        maxlayer = (td.AllOf(fwdlayer, bwdlayer) 
                    >> td.ZipWith(td.Concat()) 
                    >> td.Max())
        
        output_layer = (input_layer >> 
                        maxlayer >> td.FC(output_size, 
                                         input_keep_prob=self.keep_prob, 
                                         activation=None))

        self.compiler = td.Compiler.create((output_layer, 
                        td.Vector(output_size,dtype=tf.int32)))
                        
        self.y_out, self.y_true = self.compiler.output_tensors
        self.y_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
            logits=self.y_out,labels=self.y_true))

        self.y_prob = tf.nn.softmax(self.y_out)
        self.y_true_idx = tf.argmax(self.y_true,axis=1)
        self.y_pred_idx = tf.argmax(self.y_prob,axis=1)
        
        self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32)

        epoch_step = tf.Variable(0, trainable=False)
        self.epoch_step_op = tf.assign(epoch_step, epoch_step+1)
            
        lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay)
            
        if self.optimizer == 'adam':
            self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay)
        elif self.optimizer == 'adagrad':
            self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay,
                                                initial_accumulator_value=1e-08)
        elif self.optimizer == 'rmsprop' or self.optimizer == 'default':
            self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay,
                                                 epsilon=1e-08)
        else:
            raise Exception(('The optimizer {} is not in list of available ' 
                            + 'optimizers: default, adam, adagrad, rmsprop.')
                            .format(self.optimizer))
        
        # apply learning multiplier on on embedding learning rate
        embeds = [pos_emb.weights, char_emb.weights]
        grads_and_vars = self.opt.compute_gradients(self.y_loss)
        found = 0
        for i, (grad, var) in enumerate(grads_and_vars):
            if var in embeds:
                found += 1
                grad = tf.scalar_mul(self.embedding_factor, grad)
                grads_and_vars[i] = (grad, var)
        
        assert found == len(embeds)  # internal consistency check
        self.train_step = self.opt.apply_gradients(grads_and_vars)        
        
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(max_to_keep=100)
Esempio n. 18
0
def FC_cell(units):
    return td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=units))