Example #1
0
def main(_):

    print('reading word embedding')
    word_vec = np.load(export_path + 'vec.npy')
    print('reading entity embedding')
    ent_embedding = np.load(export_path + 'ent_embedding.npy')
    print('reading relation embedding')
    rel_embedding = np.load(export_path + 'rel_embedding.npy')
    print('reading test data')
    test_instance_triple = np.load(export_path + 'test_instance_triple.npy')
    test_instance_scope = np.load(export_path + 'test_instance_scope.npy')
    test_len = np.load(export_path + 'test_len.npy')
    test_label = np.load(export_path + 'test_label.npy')
    test_word = np.load(export_path + 'test_word.npy')
    test_pos1 = np.load(export_path + 'test_pos1.npy')
    test_pos2 = np.load(export_path + 'test_pos2.npy')
    test_mask = np.load(export_path + 'test_mask.npy')
    test_head = np.load(export_path + 'test_head.npy')
    test_tail = np.load(export_path + 'test_tail.npy')
    train_desc_tail = np.load(export_path + 'test_desc_tail.npy')
    train_desc_head = np.load(export_path + 'test_desc_head.npy')
    print('reading finished')
    print('mentions 		: %d' % (len(test_instance_triple)))
    print('sentences		: %d' % (len(test_len)))
    print('relations		: %d' % (FLAGS.num_classes))
    print('word size		: %d' % (len(word_vec[0])))
    print('position size 	: %d' % (FLAGS.pos_size))
    print('hidden size		: %d' % (FLAGS.hidden_size))
    print('reading finished')
    # desc = {}
    # with open(export_path + 'desc.txt') as f:
    # 	for content in f:
    # 		en_id, en_desc = content.strip().split('\t')
    # 		en_desc = en_desc.strip().split(',')
    # 		en_desc = [int(word) for word in en_desc]
    # 		desc[int(en_id)] = en_desc
    print('building network...')
    sess_db = tf.Session()
    # sess_db = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess_db.add_tensor_filter('has_inf_or_nan',tf_debug.has_inf_or_nan)
    merged_summary = tf.summary.merge_all()
    global_step = tf.Variable(0, name='global_step', trainable=False)
    if FLAGS.model.lower() == "cnn":
        model = network.CNN(is_training=False,
                            word_embeddings=word_vec,
                            ent_embedding=ent_embedding,
                            rel_embedding=rel_embedding)
    elif FLAGS.model.lower() == "pcnn":
        model = network.PCNN(is_training=False,
                             word_embeddings=word_vec,
                             ent_embedding=ent_embedding,
                             rel_embedding=rel_embedding)
    elif FLAGS.model.lower() == "lstm":
        model = network.RNN(is_training=False,
                            word_embeddings=word_vec,
                            cell_name="LSTM",
                            simple_position=True)
    elif FLAGS.model.lower() == "gru":
        model = network.RNN(is_training=False,
                            word_embeddings=word_vec,
                            cell_name="GRU",
                            simple_position=True)
    elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm":
        model = network.BiRNN(is_training=False,
                              word_embeddings=word_vec,
                              cell_name="LSTM",
                              simple_position=True)
    elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru":
        model = network.BiRNN(is_training=False,
                              word_embeddings=word_vec,
                              cell_name="GRU",
                              simple_position=True)
    sess_db.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label,
                  scope, head_desc, tail_desc):
        feed_dict = {
            model.head_index: head,
            model.tail_index: tail,
            model.word: word,
            model.pos1: pos1,
            model.pos2: pos2,
            model.mask: mask,
            model.len: leng,
            model.label_index: label_index,
            model.label: label,
            model.scope: scope,
            model.keep_prob: FLAGS.keep_prob,
            model.head_description: head_desc,
            model.tail_description: tail_desc
        }

        if FLAGS.katt_flag == 1:
            output, head_desc_att, tail_desc_att = sess_db.run(
                [model.test_output, model.head_desc_att, model.tail_desc_att],
                feed_dict)
        else:
            output = sess_db.run(model.test_output, feed_dict)

        # np.save('./case_study/head_desc_att',head_desc_att)
        # np.save('./case_study/tail_desc_att',tail_desc_att)
        # output = sess_db.run(model.test_output, feed_dict)
        return output

    f = open('results.txt', 'w')
    f.write('iteration\taverage precision\tP@100\tP@300\tP@500\n')
    for iters in range(1, 15):
        print(iters)
        saver.restore(
            sess_db, FLAGS.checkpoint_path + FLAGS.save_name + '/' +
            FLAGS.model + str(FLAGS.katt_flag) + "-" + str(80 * iters))
        summary_writer = tf.summary.FileWriter(FLAGS.summary_dir,
                                               sess_db.graph)
        stack_output = []
        stack_label = []

        iteration = len(test_instance_scope) // FLAGS.test_batch_size

        for i in range(iteration):
            temp_str = 'running ' + str(i) + '/' + str(iteration) + '...'
            sys.stdout.write(temp_str + '\r')
            sys.stdout.flush()
            input_scope = test_instance_scope[i *
                                              FLAGS.test_batch_size:(i + 1) *
                                              FLAGS.test_batch_size]
            index = []
            scope = [0]
            label = []
            # print('input_scope:',input_scope)
            for num in input_scope:
                index = index + list(range(num[0], num[1] + 1))
                label.append(test_label[num[0]])
                scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1)

            label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes))
            label_[np.arange(FLAGS.test_batch_size), label] = 1
            output = test_step(test_head[index], test_tail[index],
                               test_word[index, :], test_pos1[index, :],
                               test_pos2[index, :], test_mask[index, :],
                               test_len[index], test_label[index], label_,
                               np.array(scope), train_desc_head[index],
                               train_desc_tail[index])
            stack_output.append(output)
            stack_label.append(label_)

        # print('attention score:',np.shape(attention_score))
        # np.save('attention_scpre',attention_score)
        print('evaluating...')
        # print(stack_output)

        # ff = open('attention.txt','w')
        # ff.write(attention_score)
        # ff.close()
        stack_output = np.concatenate(stack_output, axis=0)
        stack_label = np.concatenate(stack_label, axis=0)

        exclude_na_flatten_output = stack_output[:, 1:]
        exclude_na_flatten_label = stack_label[:, 1:]
        print(exclude_na_flatten_output.shape)
        print(exclude_na_flatten_label.shape)

        # print (exclude_na_flatten_output)

        np.save(
            './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model +
            '+sen_att_all_prob_' + str(iters) + '.npy',
            exclude_na_flatten_output)
        np.save(
            './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model +
            '+sen_att_all_label_' + str(iters) + '.npy',
            exclude_na_flatten_label)

        average_precision = average_precision_score(exclude_na_flatten_label,
                                                    exclude_na_flatten_output,
                                                    average="micro")
        exclude_na_flatten_label = np.reshape(exclude_na_flatten_label, -1)
        exclude_na_flatten_output = np.reshape(exclude_na_flatten_output, -1)
        order = np.argsort(-exclude_na_flatten_output)
        p_100 = np.mean(exclude_na_flatten_label[order[:100]])
        p_300 = np.mean(exclude_na_flatten_label[order[:300]])
        p_500 = np.mean(exclude_na_flatten_label[order[:500]])
        print('pr: ' + str(average_precision))
        print('p@100:' + str(p_100))
        print('p@300:' + str(p_300))
        print('p@500:' + str(p_500))

        f.write(
            str(average_precision) + '\t' + str(p_100) + '\t' + str(p_300) +
            '\t' + str(p_500) + '\n')
    f.close()
Example #2
0
def main(_):
    time_start = time.time()

    save_path = './model/'

    print('reading word embedding')
    word_embedding = np.load('./data/vec.npy')

    print('reading corpus')
    train_y = np.load('./data/small_y.npy')
    train_word = np.load('./data/small_word.npy')
    train_pos1 = np.load('./data/small_pos1.npy')
    train_pos2 = np.load('./data/small_pos2.npy')

    context_word, context_pos1, context_pos2, context_y = context_split(
        train_word, train_pos1, train_pos2, train_y
    )

    settings = network.Settings()
    settings.vocab_size = len(word_embedding)
    settings.num_classes = len(context_y[0])

    print(settings.num_classes)

    entity_count = settings.entity_count

    with tf.Graph().as_default():
        sess = tf.Session()
        with sess.as_default():
            initializer = tf.contrib.layers.xavier_initializer()
            with tf.variable_scope('model', reuse=None,
                                   initializer=initializer):
                m = network.PCNN(is_training=True,
                                 word_embeddings=word_embedding,
                                 settings=settings)
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(0.001)
            train_op = optimizer.minimize(m.final_loss, global_step=global_step)
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(max_to_keep=None)

            merged_summary = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter(
                FLAGS.summary_dir + '/train_loss', sess.graph
            )

            def train_step(word_batch, pos1_batch, pos2_batch, y_batch,
                           entity_count):
                feed_dict = {}
                total_shape = []
                total_num = 0
                total_word = [[], [], []]
                total_pos1 = [[], [], []]
                total_pos2 = [[], [], []]
                for i in range(len(word_batch[0])):
                    total_shape.append(total_num)
                    total_num += len(word_batch[0][i])
                    for loc in range(3):
                        for word in word_batch[loc][i]:
                            total_word[loc].append(word)
                        for pos1 in pos1_batch[loc][i]:
                            total_pos1[loc].append(pos1)
                        for pos2 in pos2_batch[loc][i]:
                            total_pos2[loc].append(pos2)
                total_shape.append(total_num)
                total_shape = np.array(total_shape)

                feed_dict[m.total_shape] = np.array(total_shape)

                feed_dict[m.input_word_left] = np.array(total_word[0])
                feed_dict[m.input_word_mid] = np.array(total_word[1])
                feed_dict[m.input_word_right] = np.array(total_word[2])

                feed_dict[m.input_pos1_left] = np.array(total_pos1[0])
                feed_dict[m.input_pos1_mid] = np.array(total_pos1[1])
                feed_dict[m.input_pos1_right] = np.array(total_pos1[2])

                feed_dict[m.input_pos2_left] = np.array(total_pos2[0])
                feed_dict[m.input_pos2_mid] = np.array(total_pos2[1])
                feed_dict[m.input_pos2_right] = np.array(total_pos2[2])

                feed_dict[m.input_y] = y_batch

                _, step, loss, accuracy, summary, l2_loss, final_loss = \
                    sess.run(
                        [train_op, global_step, m.total_loss, m.accuracy,
                         merged_summary, m.l2_loss, m.final_loss],
                        feed_dict)
                time_str = datetime.datetime.now().isoformat()
                accuracy = np.reshape(np.array(accuracy), entity_count)
                acc = np.mean(accuracy)
                summary_writer.add_summary(summary, step)

                if step % 50 == 0:
                    tmp_str = '{}: step{}, softmax_loss {:g}, acc {:g}'.format(
                        time_str, step, loss, acc
                    )
                    print(tmp_str)

            for one_epoch in range(settings.num_epochs):
                tmp_order = np.arange(len(context_word[0]))
                np.random.shuffle(tmp_order)
                for i in range(len(tmp_order) // settings.entity_count):
                    tmp_word = [[], [], []]
                    tmp_pos1 = [[], [], []]
                    tmp_pos2 = [[], [], []]
                    tmp_y = []
                    tmp_input = tmp_order[
                        i*settings.entity_count:(i + 1)*settings.entity_count
                    ]
                    for k in tmp_input:
                        for loc in range(3):
                            tmp_word[loc].append(context_word[loc][k])
                            tmp_pos1[loc].append(context_pos1[loc][k])
                            tmp_pos2[loc].append(context_pos2[loc][k])
                        tmp_y.append(context_y[k])
                    num = 0
                    for single_word in tmp_word[0]:
                        num += len(single_word)

                    if num > 1500:
                        print('out of range')
                        continue

                    train_step(tmp_word, tmp_pos1, tmp_pos2, tmp_y,
                               settings.entity_count)

                    current_step = tf.train.global_step(sess, global_step)
                    if current_step > 9000 and current_step % 500 == 0:
                        print('saving model')
                        path = saver.save(sess, save_path + 'PCNN_model',
                                          global_step=current_step)
                        tmpstr = 'saved model to ' + path
                        print(tmpstr)
    time_finish = time.time()
    time_elapsed = time_finish - time_start
    print('Time Used:', str(datetime.timedelta(seconds=time_elapsed)))
Example #3
0
def main(_):

    print 'reading word embedding'
    word_vec = np.load(export_path + 'vec.npy')
    print 'reading training data'

    instance_triple = np.load(export_path + 'train_instance_triple.npy')
    instance_scope = np.load(export_path + 'train_instance_scope.npy')
    train_len = np.load(export_path + 'train_len.npy')
    train_label = np.load(export_path + 'train_label.npy')
    train_word = np.load(export_path + 'train_word.npy')
    train_pos1 = np.load(export_path + 'train_pos1.npy')
    train_pos2 = np.load(export_path + 'train_pos2.npy')
    train_mask = np.load(export_path + 'train_mask.npy')
    train_head = np.load(export_path + 'train_head.npy')
    train_tail = np.load(export_path + 'train_tail.npy')

    print 'reading finished'
    print 'mentions 		: %d' % (len(instance_triple))
    print 'sentences		: %d' % (len(train_len))
    print 'relations		: %d' % (FLAGS.num_classes)
    print 'word size		: %d' % (len(word_vec[0]))
    print 'position size 	: %d' % (FLAGS.pos_size)
    print 'hidden size		: %d' % (FLAGS.hidden_size)
    reltot = {}
    for index, i in enumerate(train_label):
        if not i in reltot:
            reltot[i] = 1.0
        else:
            reltot[i] += 1.0
    for i in reltot:
        reltot[i] = 1 / (reltot[i]**(0.05))
    print 'building network...'
    sess = tf.Session()
    if FLAGS.model.lower() == "cnn":
        model = network.CNN(is_training=True, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "pcnn":
        model = network.PCNN(is_training=True, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "lstm":
        model = network.RNN(is_training=True,
                            word_embeddings=word_vec,
                            cell_name="LSTM",
                            simple_position=True)
    elif FLAGS.model.lower() == "gru":
        model = network.RNN(is_training=True,
                            word_embeddings=word_vec,
                            cell_name="GRU",
                            simple_position=True)
    elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm":
        model = network.BiRNN(is_training=True,
                              word_embeddings=word_vec,
                              cell_name="LSTM",
                              simple_position=True)
    elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru":
        model = network.BiRNN(is_training=True,
                              word_embeddings=word_vec,
                              cell_name="GRU",
                              simple_position=True)

    global_step = tf.Variable(0, name='global_step', trainable=False)
    global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False)
    global_step_kg_satt = tf.Variable(0,
                                      name='global_step_kg_satt',
                                      trainable=False)
    tf.summary.scalar('learning_rate', FLAGS.learning_rate)
    tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg)

    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    grads_and_vars = optimizer.compute_gradients(model.loss)
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)

    optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg)
    grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg)
    train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg,
                                               global_step=global_step_kg)

    optimizer_kg_satt = tf.train.GradientDescentOptimizer(
        FLAGS.learning_rate_kg)
    grads_and_vars_kg_satt = optimizer_kg_satt.compute_gradients(
        model.loss_kg_att)
    train_op_kg_satt = optimizer_kg_satt.apply_gradients(
        grads_and_vars_kg_satt, global_step=global_step_kg_satt)

    merged_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)

    print 'building finished'

    def train_kg_att(coord):
        def train_step_kg_att(h_batch, t_batch, r_batch, r_scope, r_label):
            feed_dict = {
                model.pos_h: h_batch,
                model.pos_t: t_batch,
                model.pos_r: r_batch,
                model.r_scope: r_scope,
                model.r_label: r_label,
                model.r_length: np.array([len(r_label)]),
            }
            _, loss = sess.run([train_op_kg_satt, model.loss_kg_att],
                               feed_dict)
            return loss

        def merge(head, tail, rel):
            hash = {}
            for (h, t, r) in zip(head, tail, rel):
                if r < FLAGS.num_classes:
                    if not r in hash:
                        hash[r] = []
                    hash[r].append((h, t))
            rel = []
            head = []
            tail = []
            rel_label = []
            rel_config = [0]
            for r in hash:
                if len(hash[r]) != 0:
                    rel_config.append(rel_config[-1])
                    rel_label.append(r)
                    for h, t in hash[r]:
                        rel_config[-1] += 1
                        head.append(h)
                        tail.append(t)
                        rel.append(r)
            return np.array(head), np.array(tail), np.array(rel), np.array(
                rel_config), np.array(rel_label)

        batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg)
        ph = np.zeros(batch_size, dtype=np.int32)
        pt = np.zeros(batch_size, dtype=np.int32)
        pr = np.zeros(batch_size, dtype=np.int32)
        nh = np.zeros(batch_size, dtype=np.int32)
        nt = np.zeros(batch_size, dtype=np.int32)
        nr = np.zeros(batch_size, dtype=np.int32)
        ph_addr = ph.__array_interface__['data'][0]
        pt_addr = pt.__array_interface__['data'][0]
        pr_addr = pr.__array_interface__['data'][0]
        nh_addr = nh.__array_interface__['data'][0]
        nt_addr = nt.__array_interface__['data'][0]
        nr_addr = nr.__array_interface__['data'][0]
        lib.getBatch.argtypes = [
            ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
            ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int
        ]
        times_kg = 0
        while not coord.should_stop():
            times_kg += 1
            # if times_kg == 3000:
            # 	coord.request_stop()
            res = 0.0
            for batch in range(FLAGS.nbatch_kg):
                lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr,
                             nr_addr, batch_size)
                h, t, r, r_range, r_label = merge(ph, pt, pr)
                res += train_step_kg_att(h, t, r, r_range, r_label)
            time_str = datetime.datetime.now().isoformat()
            print "batch %d time %s | loss : %f" % (times_kg, time_str, res)

    def train_kg(coord):
        def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch,
                          neg_t_batch, neg_r_batch):
            feed_dict = {
                model.pos_h: pos_h_batch,
                model.pos_t: pos_t_batch,
                model.pos_r: pos_r_batch,
                model.neg_h: neg_h_batch,
                model.neg_t: neg_t_batch,
                model.neg_r: neg_r_batch
            }
            _, step, loss = sess.run(
                [train_op_kg, global_step_kg, model.loss_kg], feed_dict)
            return loss

        batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg)
        ph = np.zeros(batch_size, dtype=np.int32)
        pt = np.zeros(batch_size, dtype=np.int32)
        pr = np.zeros(batch_size, dtype=np.int32)
        nh = np.zeros(batch_size, dtype=np.int32)
        nt = np.zeros(batch_size, dtype=np.int32)
        nr = np.zeros(batch_size, dtype=np.int32)
        ph_addr = ph.__array_interface__['data'][0]
        pt_addr = pt.__array_interface__['data'][0]
        pr_addr = pr.__array_interface__['data'][0]
        nh_addr = nh.__array_interface__['data'][0]
        nt_addr = nt.__array_interface__['data'][0]
        nr_addr = nr.__array_interface__['data'][0]
        lib.getBatch.argtypes = [
            ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
            ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int
        ]
        times_kg = 0
        while not coord.should_stop():
            times_kg += 1
            # if times_kg == 3000:
            # 	coord.request_stop()
            res = 0.0
            for batch in range(FLAGS.nbatch_kg):
                lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr,
                             nr_addr, batch_size)
                res += train_step_kg(ph, pt, pr, nh, nt, nr)
            time_str = datetime.datetime.now().isoformat()
            print "batch %d time %s | loss : %f" % (times_kg, time_str, res)

    def train_nn(coord):
        def train_step(head, tail, word, pos1, pos2, mask, leng, label_index,
                       label, scope, weights):
            feed_dict = {
                model.head_index: head,
                model.tail_index: tail,
                model.word: word,
                model.pos1: pos1,
                model.pos2: pos2,
                model.mask: mask,
                model.len: leng,
                model.label_index: label_index,
                model.label: label,
                model.scope: scope,
                model.keep_prob: FLAGS.keep_prob,
                model.weights: weights
            }
            _, step, loss, summary, output, correct_predictions = sess.run([
                train_op, global_step, model.loss, merged_summary,
                model.output, model.correct_predictions
            ], feed_dict)
            summary_writer.add_summary(summary, step)
            return output, loss, correct_predictions

        stack_output = []
        stack_label = []
        stack_ce_loss = []

        train_order = range(len(instance_triple))

        save_epoch = 2
        eval_step = 300

        for one_epoch in range(FLAGS.max_epoch):

            print('epoch ' + str(one_epoch + 1) + ' starts!')
            np.random.shuffle(train_order)
            s1 = 0.0
            s2 = 0.0
            tot1 = 1.0
            tot2 = 1.0
            losstot = 0.0
            for i in range(int(len(train_order) / float(FLAGS.batch_size))):
                input_scope = np.take(
                    instance_scope,
                    train_order[i * FLAGS.batch_size:(i + 1) *
                                FLAGS.batch_size],
                    axis=0)
                index = []
                scope = [0]
                label = []
                weights = []
                for num in input_scope:
                    index = index + range(num[0], num[1] + 1)
                    label.append(train_label[num[0]])
                    scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1)
                    weights.append(reltot[train_label[num[0]]])
                label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes))
                label_[np.arange(FLAGS.batch_size), label] = 1
                output, loss, correct_predictions = train_step(
                    train_head[index], train_tail[index], train_word[index, :],
                    train_pos1[index, :], train_pos2[index, :],
                    train_mask[index, :], train_len[index], train_label[index],
                    label_, np.array(scope), weights)
                num = 0
                s = 0
                losstot += loss
                for num in correct_predictions:
                    if label[s] == 0:
                        tot1 += 1.0
                        if num:
                            s1 += 1.0
                    else:
                        tot2 += 1.0
                        if num:
                            s2 += 1.0
                    s = s + 1

                time_str = datetime.datetime.now().isoformat()
                # print "batch %d step %d time %s | loss : %f, NA accuracy: %f, not NA accuracy: %f" % (one_epoch, i, time_str, loss, s1 / tot1, s2 / tot2)
                current_step = tf.train.global_step(sess, global_step)

            if (one_epoch + 1) % save_epoch == 0:
                print 'epoch ' + str(one_epoch + 1) + ' has finished'
                print 'saving model...'
                path = saver.save(sess,
                                  FLAGS.model_dir + FLAGS.model +
                                  str(FLAGS.katt_flag),
                                  global_step=current_step)
        coord.request_stop()

    coord = tf.train.Coordinator()
    threads = []
    threads.append(threading.Thread(target=train_kg, args=(coord, )))
    threads.append(threading.Thread(target=train_nn, args=(coord, )))
    threads.append(threading.Thread(target=train_kg_att, args=(coord, )))
    for t in threads:
        t.start()
    coord.join(threads)
    if (FLAGS.store_kg_flag != 0):
        print 'saving kg...'
        ent_embedding, rel_embedding = sess.run(
            [model.word_embedding, model.rel_embeddings])
        ent_embedding = ent_embedding.tolist()
        rel_embedding = rel_embedding.tolist()
        f = open("entity2vec", "w")
        f.write(json.dumps(ent_embedding))
        f.close()
        f = open("relation2vec", "w")
        f.write(json.dumps(rel_embedding))
        f.close()
Example #4
0
def main(_):

    print 'reading word embedding'
    word_vec = np.load(export_path + 'vec.npy')
    print 'reading test data'
    test_instance_triple = np.load(export_path + 'test_instance_triple.npy')
    test_instance_scope = np.load(export_path + 'test_instance_scope.npy')
    test_len = np.load(export_path + 'test_len.npy')
    test_label = np.load(export_path + 'test_label.npy')
    test_word = np.load(export_path + 'test_word.npy')
    test_pos1 = np.load(export_path + 'test_pos1.npy')
    test_pos2 = np.load(export_path + 'test_pos2.npy')
    test_mask = np.load(export_path + 'test_mask.npy')
    test_head = np.load(export_path + 'test_head.npy')
    test_tail = np.load(export_path + 'test_tail.npy')
    print 'reading finished'
    print 'mentions 		: %d' % (len(test_instance_triple))
    print 'sentences		: %d' % (len(test_len))
    print 'relations		: %d' % (FLAGS.num_classes)
    print 'word size		: %d' % (len(word_vec[0]))
    print 'position size 	: %d' % (FLAGS.pos_size)
    print 'hidden size		: %d' % (FLAGS.hidden_size)
    print 'reading finished'

    print 'building network...'
    sess = tf.Session()
    if FLAGS.model.lower() == "cnn":
        model = network.CNN(is_training=False, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "pcnn":
        model = network.PCNN(is_training=False, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "lstm":
        model = network.RNN(is_training=False,
                            word_embeddings=word_vec,
                            cell_name="LSTM",
                            simple_position=True)
    elif FLAGS.model.lower() == "gru":
        model = network.RNN(is_training=False,
                            word_embeddings=word_vec,
                            cell_name="GRU",
                            simple_position=True)
    elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm":
        model = network.BiRNN(is_training=False,
                              word_embeddings=word_vec,
                              cell_name="LSTM",
                              simple_position=True)
    elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru":
        model = network.BiRNN(is_training=False,
                              word_embeddings=word_vec,
                              cell_name="GRU",
                              simple_position=True)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label,
                  scope):
        feed_dict = {
            model.head_index: head,
            model.tail_index: tail,
            model.word: word,
            model.pos1: pos1,
            model.pos2: pos2,
            model.mask: mask,
            model.len: leng,
            model.label_index: label_index,
            model.label: label,
            model.scope: scope,
            model.keep_prob: FLAGS.keep_prob
        }
        output = sess.run(model.test_output, feed_dict)
        return output

    FLAGS.test_batch_size, FLAGS.num_classes = int(FLAGS.test_batch_size), int(
        FLAGS.num_classes)
    f = open('results.txt', 'w')
    f.write('iteration\taverage precision\n')
    for iters in range(1, 30):
        print iters
        saver.restore(
            sess, FLAGS.checkpoint_path + FLAGS.model +
            str(int(FLAGS.katt_flag)) + "-" + str(3664 * iters))

        stack_output = []
        stack_label = []

        iteration = len(test_instance_scope) / FLAGS.test_batch_size
        for i in range(int(iteration)):
            temp_str = 'running ' + str(i) + '/' + str(iteration) + '...'
            sys.stdout.write(temp_str + '\r')
            sys.stdout.flush()
            input_scope = test_instance_scope[i *
                                              FLAGS.test_batch_size:(i + 1) *
                                              FLAGS.test_batch_size]
            index = []
            scope = [0]
            label = []
            for num in input_scope:
                index = index + range(num[0], num[1] + 1)
                label.append(test_label[num[0]])
                scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1)
            label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes))
            label_[np.arange(FLAGS.test_batch_size), label] = 1
            output = test_step(test_head[index], test_tail[index],
                               test_word[index, :], test_pos1[index, :],
                               test_pos2[index, :], test_mask[index, :],
                               test_len[index], test_label[index], label_,
                               np.array(scope))
            stack_output.append(output)
            stack_label.append(label_)

        print 'evaluating...'

        stack_output = np.concatenate(stack_output, axis=0)
        stack_label = np.concatenate(stack_label, axis=0)

        exclude_na_flatten_output = stack_output[:, 1:]
        exclude_na_flatten_label = stack_label[:, 1:]
        print exclude_na_flatten_output.shape
        print exclude_na_flatten_label.shape

        average_precision = average_precision_score(exclude_na_flatten_label,
                                                    exclude_na_flatten_output,
                                                    average="micro")

        np.save(
            './' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy',
            exclude_na_flatten_output)
        np.save(
            './' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy',
            exclude_na_flatten_label)

        print 'average_precision: ' + str(average_precision)
        f.write(str(average_precision) + '\n')
    f.close()
def main(_):
    pathname = './model/PCNN_model-'
    word_embedding = np.load('./data/vec.npy')
    test_settings = network.Settings()
    test_settings.vocab_size = 114044
    test_settings.num_classes = test_num_classes = 53
    test_settings.entity_count = test_entity_count = 262 * 9

    with tf.Graph().as_default():
        sess = tf.Session()
        with sess.as_default():
            with tf.variable_scope('model'):
                mtest = network.PCNN(is_training=False,
                                     word_embeddings=word_embedding,
                                     settings=test_settings)
            saver = tf.train.Saver()

            def test_step(word_batch, pos1_batch, pos2_batch, y_batch):
                feed_dict = {}
                total_shape = []
                total_num = 0
                total_word = [[], [], []]
                total_pos1 = [[], [], []]
                total_pos2 = [[], [], []]
                for i in range(len(word_batch[0])):
                    total_shape.append(total_num)
                    total_num += len(word_batch[0][i])
                    for loc in range(3):
                        for word in word_batch[loc][i]:
                            total_word[loc].append(word)
                        for pos1 in pos1_batch[loc][i]:
                            total_pos1[loc].append(pos1)
                        for pos2 in pos2_batch[loc][i]:
                            total_pos2[loc].append(pos2)
                total_shape.append(total_num)
                total_shape = np.array(total_shape)

                feed_dict[mtest.total_shape] = np.array(total_shape)

                feed_dict[mtest.input_word_left] = np.array(total_word[0])
                feed_dict[mtest.input_word_mid] = np.array(total_word[1])
                feed_dict[mtest.input_word_right] = np.array(total_word[2])

                feed_dict[mtest.input_pos1_left] = np.array(total_pos1[0])
                feed_dict[mtest.input_pos1_mid] = np.array(total_pos1[1])
                feed_dict[mtest.input_pos1_right] = np.array(total_pos1[2])

                feed_dict[mtest.input_pos2_left] = np.array(total_pos2[0])
                feed_dict[mtest.input_pos2_mid] = np.array(total_pos2[1])
                feed_dict[mtest.input_pos2_right] = np.array(total_pos2[2])

                feed_dict[mtest.input_y] = y_batch

                loss, accuracy, prob = sess.run(
                    [mtest.loss, mtest.accuracy, mtest.prob], feed_dict)
                return prob, accuracy

            def generate_prob(test_y, test_word, test_pos1, test_pos2,
                              test_settings):
                all_prob = []
                acc = []
                entity_count = test_settings.entity_count
                for i in range(len(test_word[0]) // entity_count):
                    prob, accuracy = test_step(
                        slice_cascade_data(test_word, i * entity_count,
                                           (i + 1) * entity_count),
                        slice_cascade_data(test_pos1, i * entity_count,
                                           (i + 1) * entity_count),
                        slice_cascade_data(test_pos2, i * entity_count,
                                           (i + 1) * entity_count),
                        test_y[i * test_entity_count:(i + 1) * entity_count])
                    acc.append(
                        np.mean(np.reshape(np.array(accuracy), entity_count)))
                    prob = np.reshape(np.array(prob),
                                      (entity_count, test_num_classes))
                    for single_prob in prob:
                        all_prob.append(single_prob[1:])

                all_prob = np.reshape(np.array(all_prob), (-1))
                return all_prob

            def print_pn(all_ans, all_prob):
                order = np.argsort(-all_prob)

                print('P@100:')
                top100 = order[:100]
                correct_num_100 = 0.0
                for i in top100:
                    if all_ans[i] == 1:
                        correct_num_100 += 1.0
                print(correct_num_100 / 100)

                print('P@200:')
                top200 = order[:200]
                correct_num_200 = 0.0
                for i in top200:
                    if all_ans[i] == 1:
                        correct_num_200 += 1.0
                print(correct_num_200 / 200)

                print('P@300:')
                top300 = order[:300]
                correct_num_300 = 0.0
                for i in top300:
                    if all_ans[i] == 1:
                        correct_num_300 += 1.0
                print(correct_num_300 / 300)

            def eval_pn(test_y, test_word, test_pos1, test_pos2,
                        test_settings):
                all_prob = generate_prob(test_y, test_word, test_pos1,
                                         test_pos2, test_settings)
                eval_y = []
                for i in test_y:
                    eval_y.append(i[1:])
                all_ans = np.reshape(eval_y, -1)
                print_pn(all_ans, all_prob)

            test_list = [17000]

            for model_iter in test_list:
                saver.restore(sess, pathname + str(model_iter))
                print('Restore Complete')
                print('Evaluating P@N for iter' + str(model_iter))

                print('Evaluating P@N for one:')
                test_y = np.load('./data/pone_test_y.npy')
                test_word = np.load('./data/pone_test_word.npy')
                test_pos1 = np.load('./data/pone_test_pos1.npy')
                test_pos2 = np.load('./data/pone_test_pos2.npy')
                c_word, c_pos1, c_pos2, c_y = context_split(
                    test_word, test_pos1, test_pos2, test_y)
                eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings)

                print('Evaluating P@N for two:')
                test_y = np.load('./data/ptwo_test_y.npy')
                test_word = np.load('./data/ptwo_test_word.npy')
                test_pos1 = np.load('./data/ptwo_test_pos1.npy')
                test_pos2 = np.load('./data/ptwo_test_pos2.npy')
                c_word, c_pos1, c_pos2, c_y = context_split(
                    test_word, test_pos1, test_pos2, test_y)
                eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings)

                print('Evaluating P@N for all:')
                test_y = np.load('./data/pall_test_y.npy')
                test_word = np.load('./data/pall_test_word.npy')
                test_pos1 = np.load('./data/pall_test_pos1.npy')
                test_pos2 = np.load('./data/pall_test_pos2.npy')
                c_word, c_pos1, c_pos2, c_y = context_split(
                    test_word, test_pos1, test_pos2, test_y)
                eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings)

                time_str = datetime.datetime.now().isoformat()
                print(time_str)

                print('Evaluating all test data and save data for PR curve')
                test_y = np.load('./data/testall_y.npy')
                test_word = np.load('./data/testall_word.npy')
                test_pos1 = np.load('./data/testall_pos1.npy')
                test_pos2 = np.load('./data/testall_pos2.npy')
                c_word, c_pos1, c_pos2, c_y = context_split(
                    test_word, test_pos1, test_pos2, test_y)

                print('Sanity Check')
                print(len(c_word[0]), len(c_y))

                all_prob_ = generate_prob(c_y, c_word, c_pos1, c_pos2,
                                          test_settings)
                # all_ans_ = np.load('./data/allans.npy')
                eval_y = []
                for i in c_y:
                    eval_y.append(i[1:])
                all_ans_ = np.reshape(eval_y, -1)

                print('P@N for all test data:')
                print_pn(all_ans_, all_prob_)

                print('saving all test result...')
                current_step = model_iter
                np.save('./out/all_prob_iter_' + str(current_step) + '.npy',
                        all_prob_)

                # print(np.shape(all_prob_), np.shape(all_ans_))
                # length of all_prob_ is shorter than all_ans_
                # because of batching

                all_ans_trimmed = all_ans_[:all_prob_.size]
                avg_precision = average_precision_score(
                    all_ans_trimmed, all_prob_)
                print('PR curve area:', str(avg_precision))

                time_str = datetime.datetime.now().isoformat()
                print(time_str)
Example #6
0
def main(_):

	print ('reading word embedding')
	word_vec = np.load(export_path + 'vec.npy') if use_embedding else None
	print ('reading training data')
	
	instance_triple = np.load(export_path + 'train_instance_triple.npy')
	instance_scope = np.load(export_path + 'train_instance_scope.npy')
	train_len = np.load(export_path + 'train_len.npy')
	train_label = np.load(export_path + 'train_label.npy') # relation idx for each sample
	train_word = np.load(export_path + 'train_word.npy')
	train_pos1 = np.load(export_path + 'train_pos1.npy')
	train_pos2 = np.load(export_path + 'train_pos2.npy')
	train_mask = np.load(export_path + 'train_mask.npy')
	train_head = np.load(export_path + 'train_head.npy')
	train_tail = np.load(export_path + 'train_tail.npy')

	print ('reading finished')
	print ('mentions 		: %d' % (len(instance_triple)))
	print ('sentences		: %d' % (len(train_len)))
	print ('relations		: %d' % (FLAGS.num_classes))
	print ('position size 	: %d' % (FLAGS.pos_size))
	print ('hidden size		: %d' % (FLAGS.hidden_size))
    # train_label: all sample's relation idx
    # count different relations numbers in all samples, give different weights
	reltot = {}
	for index, i in enumerate(train_label):
		if not i in reltot:
			reltot[i] = 1.0
		else:
			reltot[i] += 1.0
	for i in reltot:
		reltot[i] = 1/(reltot[i] ** (0.05)) 
	print ('building network...')
	sess = tf.Session()
	if FLAGS.model.lower() == "cnn":
		model = network.CNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec)
	elif FLAGS.model.lower() == "pcnn":
		model = network.PCNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec)
	elif FLAGS.model.lower() == "lstm":
		model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "LSTM", simple_position = True)
	elif FLAGS.model.lower() == "gru":
		model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "GRU", simple_position = True)
	elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm":
		model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "LSTM", simple_position = True)
	elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru":
		model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "GRU", simple_position = True)
	# model once sure, just one model, different train_epoch, optimizer.
    
    
	global_step = tf.Variable(0,name='global_step',trainable=False)
	global_step_kg = tf.Variable(0,name='global_step_kg',trainable=False)
	tf.summary.scalar('learning_rate', FLAGS.learning_rate)
	tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg)

   # text op
	optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)   # sgd
	grads_and_vars = optimizer.compute_gradients(model.loss)
	train_op = optimizer.apply_gradients(grads_and_vars, global_step = global_step)
   # kg op
	optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg)
	grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg)
	train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step = global_step_kg)

	merged_summary = tf.summary.merge_all()
	summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph)
	sess.run(tf.global_variables_initializer())
	#saver = tf.train.Saver(max_to_keep=None)
	saver = tf.train.Saver()
	print ('building finished')

	def train_kg(coord):
       #train_step_kg(ph, pt, pr, nh, nt, nr),from c++
		def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch):
			feed_dict = {
				model.pos_h: pos_h_batch,
				model.pos_t: pos_t_batch,
				model.pos_r: pos_r_batch,
				model.neg_h: neg_h_batch,
				model.neg_t: neg_t_batch,
				model.neg_r: neg_r_batch
			}
			_, step, loss = sess.run(
				[train_op_kg, global_step_kg, model.loss_kg], feed_dict)
			return loss

		batch_size = (int(FLAGS.tri_total) // int(FLAGS.nbatch_kg))   # 100,600/  300,200          /200,300  /  1000,60
		#batch_size = (int(FLAGS.ent_total) // int(FLAGS.nbatch_kg))  # should not be FLAGS.tri_total
       # B. defi each element is np32, 32 wei
		ph = np.zeros(batch_size, dtype = np.int32)                   # use to store batch's ex e1 
		pt = np.zeros(batch_size, dtype = np.int32)                   # e2
		pr = np.zeros(batch_size, dtype = np.int32)                   # r
		nh = np.zeros(batch_size, dtype = np.int32)                   # n_e1
		nt = np.zeros(batch_size, dtype = np.int32)                   # n_e2
		nr = np.zeros(batch_size, dtype = np.int32)                   # n_r
        #ph.__array_interface__['data'] :2-tuple whose first argument is an integer (a long integer if necessary) that points to the data-area storing the array contents
        # array's first element's address
		ph_addr = ph.__array_interface__['data'][0]
		pt_addr = pt.__array_interface__['data'][0]
		pr_addr = pr.__array_interface__['data'][0]
		nh_addr = nh.__array_interface__['data'][0]
		nt_addr = nt.__array_interface__['data'][0]
		nr_addr = nr.__array_interface__['data'][0]
        # define type in c   
        # ctypes.c_void_p==void *    ctypes.c_int=int
		lib.getBatch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int]
		times_kg = 0
        # coord.request_stop() let it stop at some time
        # pure batch. B. continuelly train batch. no concept of epoch. just have a size
		while not coord.should_stop():
			times_kg += 1
			res = 0.0
			#print(type(FLAGS.nbatch_kg))
			#print(FLAGS.nbatch_kg)
			for batch in range(int(FLAGS.nbatch_kg)):
				lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size)
                
				res += train_step_kg(ph, pt, pr, nh, nt, nr)
			time_str = datetime.datetime.now().isoformat()
			print ("KB batch %d time %s | loss : %f" % (times_kg, time_str, res))
			if pure_KB and times_kg % 20000 == 0:
				print ('saving model...')
				# path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step)
				path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(times_kg)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size))
              # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model')
              # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model')
              # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters))
				print ('have savde model to '+path)
			if pure_KB and times_kg==160000:
				coord.request_stop()


	def train_nn(coord):
    # train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights)
    # all from numpy
		def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights):
			feed_dict = {
				model.head_index: head,
				model.tail_index: tail,
				model.word: word,
				model.pos1: pos1,
				model.pos2: pos2,
				model.mask: mask,
				model.len : leng,
				model.label_index: label_index, # B, real relation idx
				model.label: label,             # B,|R|.   real pos value 1. other pos 0
				model.scope: scope,
				model.keep_prob: FLAGS.keep_prob,
				model.weights: weights
			}
			_, step, loss, summary, output, correct_predictions = sess.run([train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions], feed_dict)
			summary_writer.add_summary(summary, step)
			return output, loss, correct_predictions

		train_order = list(range(len(instance_triple)))

		save_epoch = 150

		for one_epoch in range(FLAGS.max_epoch):

			print('epoch '+str(one_epoch+1)+' starts!')
			np.random.shuffle(train_order)
			s1 = 0.0
			s2 = 0.0
			tot1 = 0.0
			tot2 = 0.0
			losstot = 0.0
			for i in range(int(len(train_order)/float(FLAGS.batch_size))):
				input_scope = np.take(instance_scope, train_order[i * FLAGS.batch_size:(i+1)*FLAGS.batch_size], axis=0)
				index = []
				scope = [0]
				label = []      # sample's true relation idx
				weights = []
				for num in input_scope:
					index = index + list(range(num[0], num[1] + 1))
					label.append(train_label[num[0]])
					if train_label[num[0]] > 53:
						pass
					scope.append(scope[len(scope)-1] + num[1] - num[0] + 1)
					weights.append(reltot[train_label[num[0]]])
				label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes))
				label_[np.arange(FLAGS.batch_size), label] = 1
				# correct_predictions:B, if each sample predict true(1), else 0   cnn's output
				output, loss, correct_predictions = train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights)
				num = 0
				s = 0
				losstot += loss
				for num in correct_predictions:
#					if label[s] == 0:
#						tot1 += 1.0
#						if num:
#							s1+= 1.0
					tot2 += 1.0
					if num:
						s2 += 1.0
					s = s + 1

				time_str = datetime.datetime.now().isoformat()
				print ("epoch %d batch %d time %s | loss : %f, accuracy: %f" % (one_epoch, i, time_str, loss, s2 / tot2))
				current_step = tf.train.global_step(sess, global_step)

			if (one_epoch + 1) % save_epoch == 0:
				print ('epoch '+str(one_epoch+1)+' has finished')
				print ('saving model...')
				# path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step)
				path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(one_epoch)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size))
              # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model')
              # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model')
              # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters))
				print ('have savde model to '+path)

		coord.request_stop()


	coord = tf.train.Coordinator()
	threads = []
	threads.append(threading.Thread(target=train_kg, args=(coord,)))
	if not pure_KB:
		threads.append(threading.Thread(target=train_nn, args=(coord,)))
	for t in threads: t.start()
	coord.join(threads)
Example #7
0
def main(_):

    print 'reading word embedding'
    word_vec = np.load(export_path + 'vec.npy')
    print 'reading training data'

    instance_triple = np.load(export_path + 'train_instance_triple.npy')
    instance_scope = np.load(export_path + 'train_instance_scope.npy')
    train_len = np.load(export_path + 'train_len.npy')
    train_label = np.load(export_path + 'train_label.npy')
    train_word = np.load(export_path + 'train_word.npy')
    train_pos1 = np.load(export_path + 'train_pos1.npy')
    train_pos2 = np.load(export_path + 'train_pos2.npy')
    train_mask = np.load(export_path + 'train_mask.npy')
    train_head = np.load(export_path + 'train_head.npy')
    train_tail = np.load(export_path + 'train_tail.npy')

    print 'reading finished'
    print 'mentions 		: %d' % (len(instance_triple))
    print 'sentences		: %d' % (len(train_len))
    print 'relations		: %d' % (FLAGS.num_classes)
    print 'word size		: %d' % (len(word_vec[0]))
    print 'position size 	: %d' % (FLAGS.pos_size)
    print 'hidden size		: %d' % (FLAGS.hidden_size)
    reltot = {}
    for index, i in enumerate(train_label):
        if not i in reltot:
            reltot[i] = 1.0
        else:
            reltot[i] += 1.0
    for i in reltot:
        reltot[i] = 1 / (reltot[i]**(0.05))
    print 'building network...'
    sess = tf.Session()
    if FLAGS.model.lower() == "cnn":
        model = network.CNN(is_training=True, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "pcnn":
        model = network.PCNN(is_training=True, word_embeddings=word_vec)
    elif FLAGS.model.lower() == "lstm":
        model = network.RNN(is_training=True,
                            word_embeddings=word_vec,
                            cell_name="LSTM",
                            simple_position=True)
    elif FLAGS.model.lower() == "gru":
        model = network.RNN(is_training=True,
                            word_embeddings=word_vec,
                            cell_name="GRU",
                            simple_position=True)
    elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm":
        model = network.BiRNN(is_training=True,
                              word_embeddings=word_vec,
                              cell_name="LSTM",
                              simple_position=True)
    elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru":
        model = network.BiRNN(is_training=True,
                              word_embeddings=word_vec,
                              cell_name="GRU",
                              simple_position=True)

    global_step = tf.Variable(0, name='global_step', trainable=False)
    global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False)
    tf.summary.scalar('learning_rate', FLAGS.learning_rate)
    tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg)

    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    grads_and_vars = optimizer.compute_gradients(model.loss)
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)

    optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg)
    grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg)
    train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg,
                                               global_step=global_step_kg)

    merged_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    print("================================================")
    saver.restore(
        sess, FLAGS.model_dir + sys.argv[1] + FLAGS.model +
        str(FLAGS.katt_flag) + "-" + str(itera))
    ent_embedding, rel_embedding = sess.run(
        [model.word_embedding, model.rel_embeddings])
    ent_embedding = ent_embedding.tolist()
    rel_embedding = rel_embedding.tolist()
    f = open(export_path + "entity2vec", "w")
    f.write(json.dumps(ent_embedding))
    f.close()
    f = open(export_path + "relation2vec", "w")
    f.write(json.dumps(rel_embedding))
    f.close()