Beispiel #1
0
def tri_combined(idx, pclen, depth, max_depth):
    """TF function, input: idx, pclen, depth, max_depth as batch (1D Tensor)
    Output: weight tensor (3D Tensor), first dim is batch
    """
    Wconvt = param.get('Wconvt')
    Wconvl = param.get('Wconvl')
    Wconvr = param.get('Wconvr')

    dim = tf.unstack(tf.shape(Wconvt))[0]
    batch_shape = tf.shape(idx)

    tmp = (idx - 1) / (pclen - 1)
    # when pclen == 1, replace nan items with 0.5
    tmp = tf.where(tf.is_nan(tmp), tf.ones_like(tmp) * 0.5, tmp)

    t = (max_depth - depth) / max_depth
    r = (1 - t) * tmp
    l = (1 - t) * (1 - r)

    lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l)
    rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r)
    tb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * t)

    lb = tf.reshape(lb, [-1, dim])
    rb = tf.reshape(rb, [-1, dim])
    tb = tf.reshape(tb, [-1, dim])

    tmp = tf.matmul(lb, Wconvl) + tf.matmul(rb, Wconvr) + tf.matmul(tb, Wconvt)

    tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.conv_dim])
    return tmp
Beispiel #2
0
def linear_combine(clen, pclen, idx):
    Wl = param.get('Wl')
    Wr = param.get('Wr')

    dim = tf.unstack(tf.shape(Wl))[0]
    batch_shape = tf.shape(clen)

    f = (clen / pclen)
    l = (pclen - idx - 1) / (pclen - 1)
    r = (idx) / (pclen - 1)
    # when pclen == 1, replace nan items with 0.5
    l = tf.where(tf.is_nan(l), tf.ones_like(l) * 0.5, l)
    r = tf.where(tf.is_nan(r), tf.ones_like(r) * 0.5, r)

    lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l)
    rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r)
    fb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * f)

    lb = tf.reshape(lb, [-1, hyper.word_dim])
    rb = tf.reshape(rb, [-1, hyper.word_dim])

    tmp = tf.matmul(lb, Wl) + tf.matmul(rb, Wr)

    tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.word_dim])

    return tf.matmul(fb, tmp)
Beispiel #3
0
def composed_embed_blk():
    leaf_case = direct_embed_blk()
    nonleaf_case = td.Composition(name='composed_embed_nonleaf')
    with nonleaf_case.scope():
        children = td.GetItem('children').reads(nonleaf_case.input)
        clen = td.Scalar().reads(td.GetItem('clen').reads(nonleaf_case.input))
        cclens = td.Map(td.GetItem('clen') >> td.Scalar()).reads(children)
        fchildren = td.Map(direct_embed_blk()).reads(children)

        initial_state = td.Composition()
        with initial_state.scope():
            initial_state.output.reads(
                td.FromTensor(tf.zeros(hyper.word_dim)),
                td.FromTensor(tf.zeros([])),
            )
        summed = td.Zip().reads(fchildren, cclens, td.Broadcast().reads(clen))
        summed = td.Fold(continous_weighted_add_blk(),
                         initial_state).reads(summed)[0]
        added = td.Function(tf.add, name='add_bias').reads(
            summed, td.FromTensor(param.get('B')))
        normed = clip_by_norm_blk().reads(added)

        act_fn = tf.nn.relu if hyper.use_relu else tf.nn.tanh
        relu = td.Function(act_fn).reads(normed)
        nonleaf_case.output.reads(relu)

    return td.OneOf(lambda node: node['clen'] == 0, {
        True: leaf_case,
        False: nonleaf_case
    })
Beispiel #4
0
def feature_detector_blk(max_depth=2):
    """Input: node dict
    Output: TensorType([hyper.conv_dim, ])
    Single patch of the conv. Depth is max_depth
    """
    blk = td.Composition()
    with blk.scope():
        nodes_in_patch = collect_node_for_conv_patch_blk(
            max_depth=max_depth).reads(blk.input)

        # map from python object to tensors
        mapped = td.Map(
            td.Record((coding_blk(), td.Scalar(), td.Scalar(), td.Scalar(),
                       td.Scalar()))).reads(nodes_in_patch)
        # mapped = [(feature, idx, depth, max_depth), (...)]

        # compute weighted feature for each elem
        weighted = td.Map(weighted_feature_blk()).reads(mapped)
        # weighted = [fea, fea, fea, ...]

        # add together
        added = td.Reduce(td.Function(tf.add)).reads(weighted)
        # added = TensorType([hyper.conv_dim, ])

        # add bias
        biased = td.Function(tf.add).reads(added,
                                           td.FromTensor(param.get('Bconv')))
        # biased = TensorType([hyper.conv_dim, ])

        # tanh
        tanh = td.Function(tf.nn.tanh).reads(biased)
        # tanh = TensorType([hyper.conv_dim, ])

        blk.output.reads(tanh)
    return blk
Beispiel #5
0
def coding_blk():
    """Input: node dict
    Output: TensorType([1, hyper.word_dim])
    """
    Wcomb1 = param.get('Wcomb1')
    Wcomb2 = param.get('Wcomb2')

    blk = td.Composition()
    with blk.scope():
        direct = embedding.direct_embed_blk().reads(blk.input)
        composed = embedding.composed_embed_blk().reads(blk.input)
        Wcomb1 = td.FromTensor(param.get('Wcomb1'))
        Wcomb2 = td.FromTensor(param.get('Wcomb2'))

        direct = td.Function(embedding.batch_mul).reads(direct, Wcomb1)
        composed = td.Function(embedding.batch_mul).reads(composed, Wcomb2)

        added = td.Function(tf.add).reads(direct, composed)
        blk.output.reads(added)
    return blk
Beispiel #6
0
def build_model():
    # create model variables
    param.initialize_tbcnn_weights()

    # Compile the block and append fc layers
    tree_pooling = dynamic_pooling_blk()
    compiler = td.Compiler.create((tree_pooling, td.Scalar(dtype='int64')))
    (pooled, batched_labels) = compiler.output_tensors

    fc1 = tf.nn.relu(
        tf.add(tf.matmul(pooled, param.get('FC1/weight')),
               param.get('FC1/bias')))
    fc2 = tf.nn.relu(
        tf.add(tf.matmul(fc1, param.get('FC2/weight')), param.get('FC2/bias')))

    # our prediction output with accuracy calc
    logits = tf.nn.softmax(fc2)
    correct_prediction = tf.equal(tf.argmax(logits, 1), batched_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    batch_size_op = tf.unstack(tf.shape(batched_labels))[0]

    return compiler, fc2, logits, batched_labels, accuracy, batch_size_op
Beispiel #7
0
def train_with_val(unscaled_logits, batched_labels, train_accuracy):
    global_step = tf.Variable(0, trainable=False, name='global_step')

    # calculate weight decay loss
    decay_names = ['Wl', 'Wr', 'Wconvl', 'Wconvr', 'Wconvt']
    decay_loss = tf.reduce_sum(
        input_tensor=hyper.weight_decay *
        tf.stack([tf.nn.l2_loss(param.get(n)) for n in decay_names]),
        name='weights_norm')

    # Calculate loss
    batched_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=unscaled_logits, labels=batched_labels)
    loss = tf.reduce_mean(batched_loss) + decay_loss

    # Exponential decay learning rate
    decayed_rate = tf.train.exponential_decay(hyper.learning_rate,
                                              global_step,
                                              200,
                                              0.65,
                                              staircase=True)
    opt = tf.train.AdamOptimizer(learning_rate=decayed_rate)

    # Apply optimizer
    train_step = opt.minimize(loss, global_step=global_step)

    # Attach summaries
    tf.summary.scalar('learning_rate', decayed_rate)

    tf.summary.histogram('Wl', param.get('Wl'))
    tf.summary.histogram('Wr', param.get('Wr'))
    tf.summary.histogram('B', param.get('B'))
    tf.summary.histogram('Wconvl', param.get('Wconvl'))
    tf.summary.histogram('Wconvr', param.get('Wconvr'))
    tf.summary.histogram('Wconvt', param.get('Wconvt'))
    tf.summary.histogram('Bconv', param.get('Bconv'))
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('train_accuracy', train_accuracy)
    summary_op = tf.summary.merge_all()

    return loss, global_step, train_step, summary_op
Beispiel #8
0
def write_embedding_metadata(writer, word2int):
    metadata_path = os.path.join(hyper.train_dir, 'embedding_meta.tsv')
    # dump embedding mapping
    items = sorted(word2int.items(), key=operator.itemgetter(1))
    with open(metadata_path, 'w') as f:
        for item in items:
            print(item[0], file=f)

    config = projector.ProjectorConfig()
    config.model_checkpoint_dir = hyper.train_dir
    # the above line not work yet. TF doesn't support model_checkpoint_dir
    # thus create a symlink from train_dir to log_dir
    os.symlink(os.path.join(hyper.train_dir, 'checkpoint'),
               os.path.join(hyper.log_dir, 'checkpoint'))

    embedding = config.embeddings.add()
    embedding.tensor_name = param.get('We').name
    # Link this tensor to its metadata file (e.g. labels).
    embedding.metadata_path = metadata_path
    # Saves a configuration file that TensorBoard will read during startup.
    projector.visualize_embeddings(writer, config)
Beispiel #9
0
def direct_embed_blk():
    return (td.GetItem('name') >> td.Scalar('int32') >>
            td.Function(lambda x: tf.nn.embedding_lookup(param.get('We'), x))
            >> clip_by_norm_blk())
Beispiel #10
0
def main():
    apputil.initialize(variable_scope='embedding')

    # load data early so we can initialize hyper parameters accordingly
    ds = data.load_dataset('../data/statements')
    hyper.node_type_num = len(ds.word2int)

    hyper.dump()

    # create model variables
    param.initialize_embedding_weights()

    # Compile the block
    tree_sum = td.GetItem(0) >> tree_sum_blk(l2loss_blk)
    compiler = td.Compiler.create(tree_sum)
    (batched_loss, ) = compiler.output_tensors
    loss = tf.reduce_mean(batched_loss)
    opt = tf.train.AdamOptimizer(learning_rate=hyper.learning_rate)

    global_step = tf.Variable(0, trainable=False, name='global_step')
    train_step = opt.minimize(loss, global_step=global_step)

    # Attach summaries
    tf.summary.histogram('Wl', param.get('Wl'))
    tf.summary.histogram('Wr', param.get('Wr'))
    tf.summary.histogram('B', param.get('B'))
    tf.summary.histogram('Embedding', param.get('We'))
    tf.summary.scalar('loss', loss)

    summary_op = tf.summary.merge_all()

    # create missing dir
    if not os.path.exists(hyper.train_dir):
        os.makedirs(hyper.train_dir)

    # train loop
    saver = tf.train.Saver()
    train_set = compiler.build_loom_inputs(ds.get_split('all')[1])
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        summary_writer = tf.summary.FileWriter(hyper.log_dir, graph=sess.graph)
        write_embedding_metadata(summary_writer, ds.word2int)

        for epoch, shuffled in enumerate(
                td.epochs(train_set, hyper.num_epochs), 1):
            for step, batch in enumerate(
                    td.group_by_batches(shuffled, hyper.batch_size), 1):
                train_feed_dict = {compiler.loom_input_tensor: batch}

                start_time = default_timer()
                _, loss_value, summary, gstep = sess.run(
                    [train_step, loss, summary_op, global_step],
                    train_feed_dict)
                duration = default_timer() - start_time

                logger.info(
                    'global %d epoch %d step %d loss = %.2f (%.1f samples/sec; %.3f sec/batch)',
                    gstep, epoch, step, loss_value,
                    hyper.batch_size / duration, duration)
                if gstep % 10 == 0:
                    summary_writer.add_summary(summary, gstep)
                if gstep % 10 == 0:
                    saver.save(sess,
                               os.path.join(hyper.train_dir, "model.ckpt"),
                               global_step=gstep)
Beispiel #11
0
def do_evaluation():
    # load data early to get node_type_num
    ds = data.load_dataset('data/statements')
    hyper.node_type_num = len(ds.word2int)

    (compiler, _, _, _, raw_accuracy, batch_size_op) = build_model()

    # restorer for embedding matrix
    embedding_path = tf.train.latest_checkpoint(hyper.embedding_dir)
    if embedding_path is None:
        raise ValueError('Path to embedding checkpoint is incorrect: ' +
                         hyper.embedding_dir)

    # restorer for other variables
    checkpoint_path = tf.train.latest_checkpoint(hyper.train_dir)
    if checkpoint_path is None:
        raise ValueError('Path to tbcnn checkpoint is incorrect: ' +
                         hyper.train_dir)

    restored_vars = tf.get_collection_ref('restored')
    restored_vars.append(param.get('We'))
    restored_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
    embeddingRestorer = tf.train.Saver({'embedding/We': param.get('We')})
    restorer = tf.train.Saver(
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

    # train loop
    total_size, test_gen = ds.get_split('test')
    test_set = compiler.build_loom_inputs(test_gen)
    with tf.Session() as sess:
        # Restore embedding matrix first
        embeddingRestorer.restore(sess, embedding_path)
        # Restore others
        restorer.restore(sess, checkpoint_path)
        # Initialize other variables
        gvariables = [
            v for v in tf.global_variables()
            if v not in tf.get_collection('restored')
        ]
        sess.run(tf.variables_initializer(gvariables))

        num_epochs = 1 if not hyper.warm_up else 3
        for shuffled in td.epochs(test_set, num_epochs):
            logger.info('')
            logger.info(
                '======================= Evaluation ===================================='
            )
            accumulated_accuracy = 0.
            start_time = default_timer()
            for step, batch in enumerate(
                    td.group_by_batches(shuffled, hyper.batch_size), 1):
                feed_dict = {compiler.loom_input_tensor: batch}
                accuracy_value, actual_bsize = sess.run(
                    [raw_accuracy, batch_size_op], feed_dict)
                accumulated_accuracy += accuracy_value * actual_bsize
                logger.info(
                    'evaluation in progress: running accuracy = %.2f, processed = %d / %d',
                    accuracy_value,
                    (step - 1) * hyper.batch_size + actual_bsize, total_size)
            duration = default_timer() - start_time
            total_accuracy = accumulated_accuracy / total_size
            logger.info(
                'evaluation accumulated accuracy = %.2f%% (%.1f samples/sec; %.2f seconds)',
                total_accuracy * 100, total_size / duration, duration)
            logger.info(
                '======================= Evaluation End ================================='
            )
            logger.info('')
Beispiel #12
0
def do_train():
    # load data early to get node_type_num
    ds = data.load_dataset('../data/statements')
    hyper.node_type_num = len(ds.word2int)

    hyper.dump()

    (compiler, unscaled_logits, logits, batched_labels, raw_accuracy,
     batch_size_op) = build_model()

    (loss, global_step, train_step,
     summary_op) = train_with_val(unscaled_logits, batched_labels,
                                  raw_accuracy)

    val_summary_op = tf.summary.scalar('val_accuracy', raw_accuracy)

    # create missing dir
    if not os.path.exists(hyper.train_dir):
        os.makedirs(hyper.train_dir)

    # restorer for embedding matrix
    restorer = tf.train.Saver({'embedding/We': param.get('We')})
    embedding_path = tf.train.latest_checkpoint(hyper.embedding_dir)
    if embedding_path is None:
        raise ValueError('Path to embedding checkpoint is incorrect: ' +
                         hyper.embedding_dir)

    # train loop
    saver = tf.train.Saver()
    train_set = compiler.build_loom_inputs(ds.get_split('train')[1])
    val_set = compiler.build_loom_inputs(ds.get_split('val')[1])
    with tf.Session() as sess:
        # Restore embedding matrix first
        restorer.restore(sess, embedding_path)
        # Initialize other variables
        gvariables = tf.global_variables()
        gvariables.remove(param.get('We'))  # exclude We
        sess.run(tf.variables_initializer(gvariables))

        summary_writer = tf.summary.FileWriter(hyper.log_dir, graph=sess.graph)

        val_step_counter = 0
        shuffled = zip(td.epochs(train_set, hyper.num_epochs),
                       td.epochs(val_set, hyper.num_epochs))
        for epoch, (train_shuffled, val_shuffled) in enumerate(shuffled, 1):
            for step, batch in enumerate(
                    td.group_by_batches(train_shuffled, hyper.batch_size), 1):
                train_feed_dict = {compiler.loom_input_tensor: batch}

                start_time = default_timer()
                (_, loss_value, summary, gstep, actual_bsize) = sess.run(
                    [train_step, loss, summary_op, global_step, batch_size_op],
                    train_feed_dict)
                duration = default_timer() - start_time

                logger.info(
                    'global %d epoch %d step %d loss = %.2f (%.1f samples/sec; %.3f sec/batch)',
                    gstep, epoch, step, loss_value, actual_bsize / duration,
                    duration)
                if gstep % 10 == 0:
                    summary_writer.add_summary(summary, gstep)

            # do a validation test
            logger.info('')
            logger.info(
                '======================= Validation ===================================='
            )
            accumulated_accuracy = 0.
            total_size = 0
            start_time = default_timer()
            for batch in td.group_by_batches(val_shuffled, hyper.batch_size):
                feed_dict = {compiler.loom_input_tensor: batch}
                accuracy_value, actual_bsize, val_summary = sess.run(
                    [raw_accuracy, batch_size_op, val_summary_op], feed_dict)
                summary_writer.add_summary(val_summary, val_step_counter)
                accumulated_accuracy += accuracy_value * actual_bsize
                total_size += actual_bsize
                val_step_counter += 1
                logger.info(
                    'validation step, accuracy = %.2f, current batch = %d, processed = %d',
                    accuracy_value, actual_bsize, total_size)
            duration = default_timer() - start_time
            total_accuracy = accumulated_accuracy / total_size
            logger.info(
                'validation acc = %.2f%% (%.1f samples/sec; %.2f seconds)',
                total_accuracy * 100, total_size / duration, duration)
            saved_path = saver.save(sess,
                                    os.path.join(hyper.train_dir,
                                                 "model.ckpt"),
                                    global_step=gstep)
            logger.info('validation saved path: %s', saved_path)
            logger.info(
                '======================= Validation End ================================='
            )
            logger.info('')